├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── bounded.go ├── cafs.go ├── chunking ├── adler32 │ ├── adler32.go │ └── adler32_test.go ├── chunker.go ├── chunker_test.go └── cmd │ └── chunktool │ └── chunktool.go ├── encoding.go ├── go.mod ├── printer.go ├── ram ├── ramstorage.go └── ramstorage_test.go └── remotesync ├── httpsync ├── cmd │ └── synctest │ │ └── synctest.go ├── httpsync.go └── util.go ├── receive.go ├── remotesync.go ├── remotesync_test.go ├── send.go ├── shuffle ├── shuffle.go └── shuffle_test.go ├── syncinfo.go ├── syncinfo_test.go └── util.go /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | bin/ 3 | pkg/ 4 | 5 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 6 | *.o 7 | *.a 8 | *.so 9 | 10 | # Folders 11 | _obj 12 | _test 13 | 14 | # Architecture specific extensions/prefixes 15 | *.[568vq] 16 | [568vq].out 17 | 18 | *.cgo1.go 19 | *.cgo2.c 20 | _cgo_defun.c 21 | _cgo_gotypes.go 22 | _cgo_export.* 23 | 24 | _testmain.go 25 | 26 | *.exe 27 | *.test 28 | *.prof 29 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | os: 4 | - linux 5 | - osx 6 | 7 | go: 8 | - 1.11.x 9 | - 1.12.x 10 | - tip 11 | 12 | matrix: 13 | allow_failures: 14 | - go: tip 15 | 16 | script: 17 | - go test ./... 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | 676 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cafs 2 | ==== 3 | [![Build Status](https://travis-ci.org/indyjo/cafs.svg)](https://travis-ci.org/indyjo/cafs) 4 | 5 | Content-Addressable File System. 6 | 7 | This is the data caching back-end used by the BitWrk distributed computing 8 | software. See https://bitwrk.net/ for more info. 9 | 10 | Stores data in de-duplicated form and provides a remote-synching mechanism with 11 | another CAFS instance. 12 | 13 | Data no longer referenced is kept in cache until the space is needed. 14 | Currently, data is not saved to persistent storage. -------------------------------------------------------------------------------- /bounded.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2018 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package cafs 18 | 19 | import ( 20 | "fmt" 21 | ) 22 | 23 | // Type UsageInfo contains information about how many bytes are used, locked and available 24 | // by a BoundedStorage. 25 | type UsageInfo struct { 26 | Used int64 // The number of bytes used by the storage 27 | Capacity int64 // The maximum number of bytes usable by the storage 28 | Locked int64 // The number of bytes currently locked by the storage 29 | } 30 | 31 | func (ui UsageInfo) String() string { 32 | return fmt.Sprintf("%d of %d kb used with %d kb locked", kb(ui.Used), kb(ui.Capacity), kb(ui.Locked)) 33 | } 34 | 35 | func kb(v int64) int64 { 36 | return (v + 1023) >> 10 37 | } 38 | 39 | // Interface BoundedStorage describes file storage with bounded capacity 40 | type BoundedStorage interface { 41 | FileStorage 42 | 43 | GetUsageInfo() UsageInfo 44 | 45 | // Clears any data that is not locked externally and returns the number of bytes freed. 46 | FreeCache() int64 47 | } 48 | -------------------------------------------------------------------------------- /cafs.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2017 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | // Package cafs specifies a content-addressable file storage with support 18 | // for de-duplication and remote syncing. 19 | package cafs 20 | 21 | import ( 22 | "encoding/hex" 23 | "errors" 24 | "io" 25 | ) 26 | 27 | var ErrNotFound = errors.New("Not found") 28 | var ErrStillOpen = errors.New("Temporary still open") 29 | var ErrInvalidState = errors.New("Invalid temporary state") 30 | var ErrNotEnoughSpace = errors.New("Not enough space") 31 | 32 | var LoggingEnabled = false 33 | 34 | type SKey [32]byte 35 | 36 | type FileStorage interface { 37 | // Creates a new temporary that can be written into. The info string will stick 38 | // with the temporary and also with the file, should it be created, and serves only 39 | // informational purposes. 40 | Create(info string) Temporary 41 | 42 | // Queries a file from the storage that can be read from. If the file exists, a File 43 | // interface is returned that has been locked once and that must be released correctly. 44 | // If the file does not exist, then (nil, ErrNotFound) is returned. 45 | Get(key *SKey) (File, error) 46 | 47 | DumpStatistics(log Printer) 48 | } 49 | 50 | type File interface { 51 | // Signals that this file handle is no longer in use. 52 | // If no handles exist on a file anymore, the storage space 53 | // bound by it can ce reclaimed by the garbage collector. 54 | // It is an error to call Open() or Duplicate() after Dispose(). 55 | // It is ok to call Dispose() more than once. 56 | Dispose() 57 | Key() SKey 58 | Open() io.ReadCloser 59 | Size() int64 60 | // Creates a new handle to the same file that must be Dispose()'d 61 | // independently. 62 | Duplicate() File 63 | 64 | // Returns true if the file is stored in chunks internally. 65 | // It is an error to call this function after Dispose(). 66 | IsChunked() bool 67 | // Returns an iterator to the chunks of the file. The iterator must be disposed after use. 68 | Chunks() FileIterator 69 | // Returns the number of chunks in this file, or 1 if file is not chunked 70 | NumChunks() int64 71 | } 72 | 73 | // Iterate over a set of files or chunks. 74 | type FileIterator interface { 75 | // Must be called after using this iterator. 76 | Dispose() 77 | // Returns a copy of this iterator that must be Dispose()'d independently. 78 | Duplicate() FileIterator 79 | 80 | // Advances the iterator and returns true if successful, or false if no further chunks 81 | // could be read. 82 | // Must be called before calling File(). 83 | Next() bool 84 | 85 | // Returns the key of the last file or chunk successfully read by Next(). 86 | // Before calling this function, Next() must have been called and returned true. 87 | Key() SKey 88 | 89 | // Returns the size of the last file or chunk successfully read by Next(). 90 | // Before calling this function, Next() must have been called and returned true. 91 | Size() int64 92 | 93 | // Returns the last file or chunk successfully read by Next() as a file. 94 | // The received File must be Dispose()'d. 95 | // Before calling this function, Next() must have been called and returned true. 96 | File() File 97 | } 98 | 99 | type Temporary interface { 100 | // Stores the temporary file into the FileStorage, where it 101 | // can be retrieved by key - after Close() has been called. 102 | io.WriteCloser 103 | 104 | // Returns a handle to the stored file, once Close() has been 105 | // called and no error occurred. Otherwise, panics. 106 | File() File 107 | 108 | // Must be called when the temporary is no longer needed. 109 | // It's ok to call Dispose() more than once. 110 | Dispose() 111 | } 112 | 113 | func (k SKey) String() string { 114 | return hex.EncodeToString(k[:]) 115 | } 116 | 117 | func ParseKey(s string) (*SKey, error) { 118 | if len(s) != 64 { 119 | return nil, errors.New("Invalid key length") 120 | } 121 | 122 | var result SKey 123 | if b, err := hex.DecodeString(s); err != nil { 124 | return nil, err 125 | } else { 126 | copy(result[:], b) 127 | } 128 | 129 | return &result, nil 130 | } 131 | 132 | func MustParseKey(s string) *SKey { 133 | if key, err := ParseKey(s); err != nil { 134 | panic(err) 135 | } else { 136 | return key 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /chunking/adler32/adler32.go: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package adler32 implements a chunker based on the Adler-32 checksum. 6 | // 7 | // This package is based on Go's original package "hash/adler32" adapted 8 | // to support using Adler-32 as a chunking algorithm. 9 | // 10 | // Adler-32 is defined in RFC 1950: 11 | // Adler-32 is composed of two sums accumulated per byte: s1 is 12 | // the sum of all bytes, s2 is the sum of all s1 values. Both sums 13 | // are done modulo 65521. s1 is initialized to 1, s2 to zero. The 14 | // Adler-32 checksum is stored as s2*65536 + s1 in most- 15 | // significant-byte first (network) order. 16 | package adler32 17 | 18 | const ( 19 | // mod is the largest prime that is less than 65536. 20 | mod = 65521 21 | // nmax_push is the largest n such that 22 | // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1. 23 | // It is mentioned in RFC 1950 (search for "5552"). 24 | nmax_push = 5552 25 | 26 | // In popFront(), we must multiply with the size parameter, 27 | // which can be much higher (up to 65520) -- Jonas 28 | nmax_pop = 256 29 | 30 | WINDOW_SIZE = 48 31 | MIN_CHUNK = 128 32 | MAX_CHUNK = 131072 33 | ) 34 | 35 | // type Adler32Chunker implements the Chunker interface based on the Adler-32 checksum. 36 | type Adler32Chunker struct { 37 | a uint32 38 | n, p int 39 | window [WINDOW_SIZE]byte 40 | } 41 | 42 | // Function NewChunker returns a new Chunker. 43 | func NewChunker() *Adler32Chunker { 44 | var c = Adler32Chunker{a: 1} 45 | return &c 46 | } 47 | 48 | func (c *Adler32Chunker) Scan(data []byte) int { 49 | if len(data) == 0 { 50 | return 0 51 | } 52 | 53 | prefixLen := 0 54 | // Initially, fill window 55 | if c.n < WINDOW_SIZE { 56 | prefixLen = WINDOW_SIZE - c.n 57 | if len(data) < prefixLen { 58 | prefixLen = len(data) 59 | } 60 | c.a = pushBack(c.a, data[:prefixLen]) 61 | c.n += prefixLen 62 | copy(c.window[c.p:c.p+prefixLen], data[:prefixLen]) 63 | c.p += prefixLen 64 | if c.p == WINDOW_SIZE { 65 | c.p = 0 66 | } 67 | data = data[prefixLen:] 68 | } 69 | 70 | for i, _ := range data { 71 | c.a = popFront(c.a, c.window[c.p:c.p+1], WINDOW_SIZE) 72 | c.window[c.p] = data[i] 73 | c.a = pushBack(c.a, data[i:i+1]) 74 | c.n++ 75 | 76 | // Chunk boundary at MAX_CHUNK or if hash is 4159 modulo 8191 (both are prime) 77 | if c.n > MIN_CHUNK && 4159 == (c.a%8191) || c.n > MAX_CHUNK { 78 | // Reset chunker and return position in data 79 | *c = Adler32Chunker{a: 1} 80 | return i + prefixLen // Byte will become beginning of next segment 81 | } 82 | 83 | c.p++ 84 | if c.p == WINDOW_SIZE { 85 | c.p = 0 86 | } 87 | } 88 | 89 | return len(data) + prefixLen 90 | } 91 | 92 | // Add p to the running checksum d. 93 | func pushBack(d uint32, p []byte) uint32 { 94 | s1, s2 := uint32(d&0xffff), uint32(d>>16) 95 | for len(p) > 0 { 96 | var q []byte 97 | if len(p) > nmax_push { 98 | p, q = p[:nmax_push], p[nmax_push:] 99 | } 100 | for _, x := range p { 101 | s1 += uint32(x) 102 | s2 += s1 103 | } 104 | s1 %= mod 105 | s2 %= mod 106 | p = q 107 | } 108 | return uint32(s2<<16 | s1) 109 | } 110 | 111 | // Remove p from the front of the running checksum d. 112 | // size is the number of elements in the hash before popFront is executed. 113 | func popFront(d uint32, p []byte, size int) uint32 { 114 | s1, s2 := uint32(d&0xffff), uint32(d>>16) 115 | if size >= mod { 116 | size %= mod 117 | } 118 | for len(p) > 0 { 119 | var q []byte 120 | var run = nmax_pop 121 | if size < run { 122 | run = size 123 | } 124 | if len(p) > run { 125 | p, q = p[:run], p[run:] 126 | } 127 | s1 += 65550 * mod // Maximum x = 0 (mod 65521) so x+65520 is still uint32 128 | s2 += 65550 * mod 129 | for _, x := range p { 130 | s1 -= uint32(x) 131 | s2 -= uint32(size)*uint32(x) + 1 132 | size-- 133 | } 134 | s1 %= mod 135 | s2 %= mod 136 | size %= mod 137 | p = q 138 | } 139 | return uint32(s2<<16 | s1) 140 | } 141 | 142 | // Checksum returns the Adler-32 checksum of data. 143 | func Checksum(data []byte) uint32 { return pushBack(1, data) } 144 | -------------------------------------------------------------------------------- /chunking/adler32/adler32_test.go: -------------------------------------------------------------------------------- 1 | package adler32 2 | 3 | import ( 4 | "hash/adler32" 5 | "math/rand" 6 | "testing" 7 | ) 8 | 9 | const LEN = 4096 10 | 11 | func TestAdler32(t *testing.T) { 12 | data := make([]byte, LEN) 13 | for i := 0; i < LEN; i++ { 14 | data[i] = byte(rand.Int()) 15 | } 16 | //t.Logf("Data is: %x", data) 17 | 18 | for chunk := 1; chunk <= 2048; chunk++ { 19 | for run := 0; run <= LEN; run += chunk { 20 | var d uint32 21 | d = 1 22 | for i := 0; i <= LEN-chunk; i += chunk { 23 | if i >= run { 24 | d2 := Checksum(data[i-run : i]) 25 | if uint32(d) != d2 { 26 | t.Fatalf("Failed for [%d..%d), d=%08x, d2=%08x chunk=%d", i-run, i, d, d2, chunk) 27 | } 28 | //t.Logf("Adler32 of [%6d..%6d) is %08x - %08x", i-run, i, d, d2) 29 | } 30 | d = pushBack(d, data[i:i+chunk]) 31 | if i+chunk > run { 32 | d = popFront(d, data[i-run:i+chunk-run], run+chunk) 33 | } 34 | } 35 | } 36 | } 37 | } 38 | 39 | func TestOverflow(t *testing.T) { 40 | data := make([]byte, 65536) 41 | for k, _ := range data { 42 | data[k] = 255 43 | } 44 | for i := 17343; i <= 17343; i++ { 45 | d := uint32(1) 46 | d = pushBack(d, data[:i]) 47 | if uint32(d) != adler32.Checksum(data[:i]) { 48 | t.Fatalf("pushBack seems to be wrong") 49 | } 50 | d = popFront(d, data[:i], i) 51 | if d != uint32(1) { 52 | t.Errorf("Overflow at length %d detected: d=%08x", i, d) 53 | } 54 | } 55 | } 56 | 57 | var data = generateSample() 58 | 59 | func generateSample() []byte { 60 | result := make([]byte, 16384) 61 | for k, _ := range result { 62 | result[k] = byte(rand.Int()) 63 | } 64 | return result 65 | } 66 | 67 | func BenchmarkPushBack(b *testing.B) { 68 | d := uint32(1) 69 | for i := 0; i < b.N; i++ { 70 | j := i & 16383 71 | pushBack(d, data[j:j+1]) 72 | } 73 | } 74 | 75 | func BenchmarkPushBack16(b *testing.B) { 76 | d := uint32(1) 77 | for i := 0; i < b.N; i += 16 { 78 | j := i & 16383 79 | pushBack(d, data[j:j+16]) 80 | } 81 | } 82 | 83 | func BenchmarkPopFront(b *testing.B) { 84 | d := uint32(1) 85 | for i := 0; i < b.N; i++ { 86 | j := i & 16383 87 | popFront(d, data[j:j+1], 33) 88 | } 89 | } 90 | 91 | func BenchmarkPopFront16(b *testing.B) { 92 | d := uint32(1) 93 | for i := 0; i < b.N; i += 16 { 94 | j := i & 16383 95 | popFront(d, data[j:j+16], 33) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /chunking/chunker.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2017 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | // Package chunking implements an algorithm for content-based chunking of arbitrary files. 18 | package chunking 19 | 20 | import "github.com/indyjo/cafs/chunking/adler32" 21 | 22 | const ( 23 | MaxChunkSize = adler32.MAX_CHUNK 24 | ) 25 | 26 | type Chunker interface { 27 | // Scans the byte sequence for chunk boundaries. 28 | // Returns the number of bytes from data that can be added to the current chunk. 29 | // A return value of len(data) means that no chunk boundary has been found in this block. 30 | Scan(data []byte) int 31 | } 32 | 33 | // Function New returns a new chunker. 34 | func New() Chunker { 35 | return adler32.NewChunker() 36 | } 37 | -------------------------------------------------------------------------------- /chunking/chunker_test.go: -------------------------------------------------------------------------------- 1 | package chunking 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | ) 7 | 8 | func TestChunker(t *testing.T) { 9 | data := make([]byte, 1<<24) 10 | for k, _ := range data { 11 | data[k] = byte(rand.Int()) 12 | } 13 | 14 | chunker := New() 15 | chunkCount := 0 16 | for i := 0; i < len(data); { 17 | block := data[i:] 18 | bytes := chunker.Scan(block) 19 | i += bytes 20 | if bytes < len(block) { 21 | t.Logf("Chunk at %d", i) 22 | chunkCount++ 23 | } 24 | } 25 | 26 | t.Logf("Generated %d chunks, avg size: %d bytes", chunkCount, len(data)/chunkCount) 27 | } 28 | 29 | func TestSingleByteBlocks(t *testing.T) { 30 | size := (1 << 17) * 10000 31 | chunker := New() 32 | blocks := 0 33 | r := rand.New(rand.NewSource(0)) 34 | for i := 0; i < size; i++ { 35 | val := r.Int() 36 | for j := uint(1); j < 24; j++ { 37 | val ^= val >> j 38 | } 39 | if chunker.Scan([]byte{byte(val)}) == 0 { 40 | blocks++ 41 | } 42 | size-- 43 | } 44 | blocks += 1 45 | if blocks <= size/(1<<17) { 46 | t.Errorf("Test produced only %v blocks -> no chunk boundaries found", blocks) 47 | } else { 48 | t.Logf("Test produced %v blocks (avg size: %d)", blocks, size/blocks) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /chunking/cmd/chunktool/chunktool.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha256" 6 | "flag" 7 | "fmt" 8 | "github.com/indyjo/cafs/chunking" 9 | "io" 10 | "os" 11 | "sort" 12 | ) 13 | 14 | const APP_VERSION = "0.1" 15 | 16 | // The flag package provides a default help printer via -h switch 17 | var versionFlag = flag.Bool("v", false, "Print the version number.") 18 | var numFingers = flag.Int("n", 5, "Number of fingers in handprint.") 19 | var matrixMode = flag.Bool("m", false, "Display similarity matrix.") 20 | var printChunks = flag.Bool("c", false, "Print chunks on the go.") 21 | 22 | func main() { 23 | flag.Parse() // Scan the arguments list 24 | 25 | if *versionFlag { 26 | fmt.Println("Version:", APP_VERSION) 27 | } 28 | 29 | fingerprints := make(map[string]bool) 30 | 31 | for _, arg := range flag.Args() { 32 | if handprint, err := chunkFile(arg, *numFingers, !*matrixMode, *printChunks); err != nil { 33 | fmt.Println("Failed: ", err) 34 | } else { 35 | for _, fingerprint := range handprint.Fingerprints { 36 | fingerprints[fmt.Sprintf("%16x", fingerprint[:8])] = true 37 | } 38 | } 39 | } 40 | 41 | if *matrixMode { 42 | printFingerprintMatrix(fingerprints) 43 | } 44 | } 45 | 46 | func printFingerprintMatrix(fingerprints map[string]bool) { 47 | allFingers := make([]string, 0, len(fingerprints)) 48 | for k := range fingerprints { 49 | allFingers = append(allFingers, k) 50 | } 51 | sort.Strings(allFingers) 52 | for _, arg := range flag.Args() { 53 | if handprint, err := chunkFile(arg, *numFingers, false, false); err != nil { 54 | fmt.Println("Failed: ", err) 55 | } else { 56 | fingerprintsInHandprint := make(map[string]bool) 57 | for _, fingerprint := range handprint.Fingerprints { 58 | fingerprintsInHandprint[fmt.Sprintf("%16x", fingerprint[:8])] = true 59 | } 60 | row := make([]byte, 0, 32) 61 | for _, fingerprint := range allFingers { 62 | if _, ok := fingerprintsInHandprint[fingerprint]; ok { 63 | row = append(row, '.') 64 | } else { 65 | row = append(row, ' ') 66 | } 67 | } 68 | fmt.Printf("%s %s\n", row, arg) 69 | } 70 | } 71 | } 72 | 73 | type Handprint struct { 74 | Fingerprints [][]byte 75 | } 76 | 77 | func NewHandprint(size int) *Handprint { 78 | return &Handprint{make([][]byte, 0, size)} 79 | } 80 | 81 | func (h *Handprint) String() string { 82 | result := make([]byte, 0, 20) 83 | for _, v := range h.Fingerprints { 84 | result = append(result, []byte(fmt.Sprintf("%4x", v[:2]))...) 85 | } 86 | return string(result) 87 | } 88 | 89 | func (h *Handprint) Insert(fingerprint []byte) { 90 | for i, other := range h.Fingerprints { 91 | if bytes.Compare(other, fingerprint) <= 0 { 92 | continue 93 | } 94 | fingerprint, h.Fingerprints[i] = other, fingerprint 95 | } 96 | if len(h.Fingerprints) < cap(h.Fingerprints) { 97 | h.Fingerprints = append(h.Fingerprints, fingerprint) 98 | } 99 | } 100 | 101 | func chunkFile(filename string, size int, printSummary, printChunks bool) (*Handprint, error) { 102 | handprint := NewHandprint(size) 103 | //fmt.Printf("Chunking %s\n", filename) 104 | fi, err := os.Open(filename) 105 | if err != nil { 106 | return nil, err 107 | } 108 | defer fi.Close() 109 | 110 | buffer := make([]byte, 16384) 111 | chunker := chunking.New() 112 | 113 | numChunks := 1 114 | numBytes := 0 115 | sha := sha256.New() 116 | chunkLen := 0 117 | for { 118 | n, err := fi.Read(buffer) 119 | if err != nil && err != io.EOF { 120 | return nil, err 121 | } 122 | if n == 0 { 123 | handprint.Insert(sha.Sum(make([]byte, 0, 32))) 124 | break 125 | } 126 | numBytes += n 127 | slice := buffer[:n] 128 | for len(slice) > 0 { 129 | bytesInChunk := chunker.Scan(slice) 130 | chunkLen += bytesInChunk 131 | sha.Write(slice[:bytesInChunk]) 132 | if bytesInChunk < len(slice) { 133 | handprint.Insert(sha.Sum(make([]byte, 0, 32))) 134 | if printChunks { 135 | fmt.Printf(" %6d %032x\n", chunkLen, sha.Sum(make([]byte, 0, 32))) 136 | } 137 | sha.Reset() 138 | chunkLen = 0 139 | numChunks++ 140 | } 141 | slice = slice[bytesInChunk:] 142 | } 143 | } 144 | 145 | //fmt.Printf("Generated %d chunks on avg %d bytes long.\n", numChunks, numBytes/numChunks) 146 | if printSummary { 147 | fmt.Printf("%-20s %s\n", handprint, filename) 148 | } 149 | return handprint, nil 150 | } 151 | -------------------------------------------------------------------------------- /encoding.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package cafs 18 | 19 | import ( 20 | "encoding/hex" 21 | "encoding/json" 22 | ) 23 | 24 | var _ json.Marshaler = SKey{} 25 | var _ json.Unmarshaler = &SKey{} 26 | 27 | func (k SKey) MarshalJSON() ([]byte, error) { 28 | l := hex.EncodedLen(len(k)) + 2 29 | dst := make([]byte, l) 30 | dst[0] = '"' 31 | dst[l-1] = '"' 32 | hex.Encode(dst[1:l-1], k[:]) 33 | return dst, nil 34 | } 35 | 36 | func (k *SKey) UnmarshalJSON(b []byte) error { 37 | var s string 38 | if err := json.Unmarshal(b, &s); err != nil { 39 | return err 40 | } 41 | if _, err := hex.Decode(k[:], []byte(s)); err != nil { 42 | return err 43 | } 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/indyjo/cafs 2 | 3 | go 1.12 4 | -------------------------------------------------------------------------------- /printer.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2017 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package cafs 18 | 19 | import ( 20 | "fmt" 21 | "io" 22 | ) 23 | 24 | // Interface Printer is used by CAFS for debugging output 25 | type Printer interface { 26 | Printf(format string, v ...interface{}) 27 | } 28 | 29 | type writerPrinter struct { 30 | w io.Writer 31 | } 32 | 33 | func NewWriterPrinter(w io.Writer) Printer { 34 | return writerPrinter{w} 35 | } 36 | 37 | func (p writerPrinter) Printf(format string, v ...interface{}) { 38 | if len(format) == 0 || format[len(format)-1] != '\n' { 39 | format = format + "\n" 40 | } 41 | fmt.Fprintf(p.w, format, v...) 42 | } 43 | -------------------------------------------------------------------------------- /ram/ramstorage.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2018 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | // This package implements a content-addressable file storage that keeps its 18 | // data in RAM. 19 | package ram 20 | 21 | import ( 22 | "bytes" 23 | "crypto/sha256" 24 | "fmt" 25 | . "github.com/indyjo/cafs" 26 | "github.com/indyjo/cafs/chunking" 27 | "hash" 28 | "io" 29 | "log" 30 | "sync" 31 | ) 32 | 33 | type ramStorage struct { 34 | mutex sync.Mutex 35 | entries map[SKey]*ramEntry 36 | bytesUsed, bytesMax int64 37 | bytesLocked int64 38 | youngest, oldest SKey 39 | } 40 | 41 | type ramFile struct { 42 | storage *ramStorage 43 | key SKey 44 | entry *ramEntry 45 | disposed bool 46 | } 47 | 48 | type chunkRef struct { 49 | key SKey 50 | // Points to the byte position within the file immediately after this chunk 51 | nextPos int64 52 | } 53 | 54 | type ramEntry struct { 55 | // Keys to the next older and next younger entry 56 | younger, older SKey 57 | info string 58 | // Holds data if entry is of simple kind 59 | data []byte 60 | // Holds a list of chunk positions if entry is of chunk list type 61 | chunks []chunkRef 62 | refs int 63 | } 64 | 65 | type ramDataReader struct { 66 | data []byte 67 | index int 68 | } 69 | 70 | type ramChunkReader struct { 71 | storage *ramStorage // Storage to read from 72 | entry *ramEntry // Entry containing the chunks 73 | key SKey // SKey of that entry 74 | chunksTail []chunkRef // Remaining chunks 75 | closed bool // Whether Close() has been called 76 | dataReader io.ReadCloser 77 | } 78 | 79 | type ramTemporary struct { 80 | storage *ramStorage 81 | info string // Info text given by user identifying the current file 82 | buffer bytes.Buffer // Stores bytes since beginning of current chunk 83 | fileHash hash.Hash // hash since the beginning of the file 84 | chunkHash hash.Hash // hash since the beginning of the current chunk 85 | valid bool // If false, something has gone wrong 86 | open bool // Set to false on Close() 87 | chunker chunking.Chunker // Determines chunk boundaries 88 | chunks []chunkRef // Grows every time a chunk boundary is encountered 89 | } 90 | 91 | func NewRamStorage(maxBytes int64) BoundedStorage { 92 | return &ramStorage{ 93 | entries: make(map[SKey]*ramEntry), 94 | bytesMax: maxBytes, 95 | } 96 | } 97 | 98 | func (s *ramStorage) GetUsageInfo() UsageInfo { 99 | s.mutex.Lock() 100 | defer s.mutex.Unlock() 101 | return UsageInfo{Used: s.bytesUsed, Capacity: s.bytesMax, Locked: s.bytesLocked} 102 | } 103 | 104 | func (s *ramStorage) FreeCache() int64 { 105 | s.mutex.Lock() 106 | defer s.mutex.Unlock() 107 | oldBytesUsed := s.bytesUsed 108 | s.reserveBytes("FreeCache", s.bytesMax) 109 | return oldBytesUsed - s.bytesUsed 110 | } 111 | 112 | func (s *ramStorage) Get(key *SKey) (File, error) { 113 | s.mutex.Lock() 114 | entry, ok := s.entries[*key] 115 | if ok { 116 | if entry.refs == 0 { 117 | s.removeFromChain(key, entry) 118 | s.bytesLocked += entry.storageSize() 119 | } 120 | entry.refs++ 121 | } 122 | s.mutex.Unlock() 123 | if ok { 124 | return &ramFile{s, *key, entry, false}, nil 125 | } else { 126 | return nil, ErrNotFound 127 | } 128 | return nil, nil // never reached 129 | } 130 | 131 | func (s *ramStorage) Create(info string) Temporary { 132 | return &ramTemporary{ 133 | storage: s, 134 | info: info, 135 | fileHash: sha256.New(), 136 | chunkHash: sha256.New(), 137 | valid: true, 138 | open: true, 139 | chunker: chunking.New(), 140 | chunks: make([]chunkRef, 0, 16), 141 | } 142 | } 143 | 144 | func (s *ramStorage) DumpStatistics(log Printer) { 145 | s.mutex.Lock() 146 | defer s.mutex.Unlock() 147 | 148 | link := func(k SKey, n int, local bool) string { 149 | zero := SKey{} 150 | if k == zero { 151 | return fmt.Sprintf("%x", k[:n]) 152 | } else if local { 153 | return fmt.Sprintf(`%x`, k, k[:n]) 154 | } else { 155 | return fmt.Sprintf(`%x`, k, k[:n]) 156 | } 157 | } 158 | 159 | log.Printf("CAFS Statistics
")
160 | 	log.Printf("Bytes used: %d, locked: %d, oldest: %x, youngest: %x", s.bytesUsed, s.bytesLocked, s.oldest[:4], s.youngest[:4])
161 | 	for key, entry := range s.entries {
162 | 		log.Printf("  [%v] refs=%d size=%v [%v] %v (older) %v (younger)",
163 | 			key, link(key, 4, false), entry.refs, entry.storageSize(), entry.info,
164 | 			link(entry.older, 4, true), link(entry.younger, 4, true))
165 | 
166 | 		prevPos := int64(0)
167 | 		for i, chunk := range entry.chunks {
168 | 			log.Printf("             chunk %4d: %v (length %6d, ends at %7d)", i,
169 | 				link(chunk.key, 4, true), chunk.nextPos-prevPos, chunk.nextPos)
170 | 			prevPos = chunk.nextPos
171 | 		}
172 | 	}
173 | 	log.Printf("
") 174 | } 175 | 176 | func (s *ramStorage) reserveBytes(info string, numBytes int64) error { 177 | if numBytes > s.bytesMax { 178 | return ErrNotEnoughSpace 179 | } 180 | bytesFree := s.bytesMax - s.bytesUsed 181 | if bytesFree < numBytes && LoggingEnabled { 182 | log.Printf("[%v] Need to free %v (currently unlocked %v) more bytes of CAFS space to store object of size %v", 183 | info, numBytes-bytesFree, s.bytesUsed-s.bytesLocked, numBytes) 184 | } 185 | for bytesFree < numBytes { 186 | oldestKey := s.oldest 187 | oldestEntry := s.entries[oldestKey] 188 | if oldestEntry == nil { 189 | return ErrNotEnoughSpace 190 | } 191 | s.removeFromChain(&s.oldest, oldestEntry) 192 | delete(s.entries, oldestKey) 193 | 194 | oldLocked := s.bytesLocked 195 | // Dereference all referenced chunks 196 | for _, chunk := range oldestEntry.chunks { 197 | s.release(&chunk.key, s.entries[chunk.key]) 198 | } 199 | oldestSize := oldestEntry.storageSize() 200 | s.bytesUsed -= oldestSize 201 | bytesFree += oldestSize 202 | if LoggingEnabled { 203 | log.Printf("[%v] Deleted object of size %v bytes: [%v] %v", info, oldestSize, oldestEntry.info, oldestKey) 204 | if oldLocked != s.bytesLocked { 205 | log.Printf(" -> unlocked %d bytes", oldLocked-s.bytesLocked) 206 | } 207 | } 208 | } 209 | return nil 210 | } 211 | 212 | // Puts an entry into the store. If an entry already exists, it must be identical to the old one. 213 | // The newly-created or recycled entry has been lock'ed once and must be release'd properly. 214 | func (s *ramStorage) storeEntry(key *SKey, data []byte, chunks []chunkRef, info string) error { 215 | if len(data) > 0 && len(chunks) > 0 { 216 | panic("Illegal entry") 217 | } 218 | s.mutex.Lock() 219 | defer s.mutex.Unlock() 220 | 221 | // Detect if we're re-writing the same data (or even handle a hash collision) 222 | var newEntry *ramEntry 223 | if oldEntry := s.entries[*key]; oldEntry != nil { 224 | if len(oldEntry.data) != len(data) || len(oldEntry.chunks) != len(chunks) { 225 | panic(fmt.Sprintf("[%v] Key collision: %v [%v]", info, key, oldEntry.info)) 226 | } 227 | if LoggingEnabled { 228 | log.Printf("[%v] Recycling key: %v [%v] (data: %d bytes, chunks: %d)", info, key, oldEntry.info, len(data), len(chunks)) 229 | } 230 | 231 | // Ref the reused entry. 232 | s.lock(key, oldEntry) 233 | 234 | // Unref all referenced chunks 235 | for _, chunk := range chunks { 236 | chunkEntry := s.entries[chunk.key] 237 | s.release(&chunk.key, chunkEntry) 238 | } 239 | 240 | // re-use old entry 241 | newEntry = oldEntry 242 | } else { 243 | newEntry = &ramEntry{ 244 | info: info, 245 | data: data, 246 | chunks: chunks, 247 | refs: 1, 248 | } 249 | // Reserve the necessary space for storing the object 250 | if err := s.reserveBytes(info, newEntry.storageSize()); err != nil { 251 | return err 252 | } 253 | 254 | s.entries[*key] = newEntry 255 | s.bytesUsed += newEntry.storageSize() 256 | s.bytesLocked += newEntry.storageSize() 257 | if LoggingEnabled { 258 | log.Printf("[%v] Stored key: %v (data: %d bytes, chunks: %d)", info, key, len(data), len(chunks)) 259 | } 260 | } 261 | 262 | return nil 263 | } 264 | 265 | func (s *ramStorage) removeFromChain(key *SKey, entry *ramEntry) { 266 | if youngerEntry := s.entries[entry.younger]; youngerEntry != nil { 267 | youngerEntry.older = entry.older 268 | } else if s.youngest == *key { 269 | s.youngest = entry.older 270 | } 271 | if olderEntry := s.entries[entry.older]; olderEntry != nil { 272 | olderEntry.younger = entry.younger 273 | } else if s.oldest == *key { 274 | s.oldest = entry.younger 275 | } 276 | // clear outgoing links 277 | entry.younger, entry.older = SKey{}, SKey{} 278 | } 279 | 280 | func (s *ramStorage) insertIntoChain(key *SKey, entry *ramEntry) { 281 | entry.older = s.youngest 282 | if youngestEntry := s.entries[s.youngest]; youngestEntry != nil { 283 | // chain former youngest entry to new one 284 | youngestEntry.younger = *key 285 | } else { 286 | // empty map, new entry will also be oldest 287 | s.oldest = *key 288 | } 289 | s.youngest = *key 290 | } 291 | 292 | // Mutex lock-protected version of lock() 293 | func (s *ramStorage) lockL(key *SKey, entry *ramEntry) { 294 | s.mutex.Lock() 295 | defer s.mutex.Unlock() 296 | s.lock(key, entry) 297 | } 298 | 299 | func (s *ramStorage) lock(key *SKey, entry *ramEntry) { 300 | if entry.refs == 0 { 301 | s.removeFromChain(key, entry) 302 | s.bytesLocked += entry.storageSize() 303 | } 304 | entry.refs++ 305 | } 306 | 307 | // Mutex lock-protected version of release() 308 | func (s *ramStorage) releaseL(key *SKey, entry *ramEntry) { 309 | s.mutex.Lock() 310 | defer s.mutex.Unlock() 311 | s.release(key, entry) 312 | } 313 | 314 | // Dereferences a single entry. Must happen while mutex is held. 315 | func (s *ramStorage) release(key *SKey, entry *ramEntry) { 316 | if entry.refs == 0 { 317 | panic(fmt.Sprintf("Can't release entry %v with 0 references", key)) 318 | } 319 | entry.refs-- 320 | if entry.refs == 0 { 321 | s.bytesLocked -= entry.storageSize() 322 | s.insertIntoChain(key, entry) 323 | } 324 | } 325 | 326 | // These are only estimates. Even an empty file consumes storage. 327 | const entrySize = 112 328 | const chunkSize = 40 329 | 330 | func (e *ramEntry) storageSize() int64 { 331 | return int64(entrySize + len(e.data) + chunkSize*len(e.chunks)) 332 | } 333 | 334 | func (f *ramFile) Key() SKey { 335 | return f.key 336 | } 337 | 338 | func (f *ramFile) Open() io.ReadCloser { 339 | if len(f.entry.chunks) > 0 { 340 | f.storage.lockL(&f.key, f.entry) 341 | return &ramChunkReader{ 342 | storage: f.storage, 343 | entry: f.entry, 344 | key: f.key, 345 | chunksTail: f.entry.chunks, 346 | closed: false, 347 | } 348 | } else { 349 | return &ramDataReader{f.entry.data, 0} 350 | } 351 | } 352 | 353 | func (f *ramFile) Size() int64 { 354 | if f.entry.data != nil { 355 | return int64(len(f.entry.data)) 356 | } else { 357 | return f.entry.chunks[len(f.entry.chunks)-1].nextPos 358 | } 359 | } 360 | 361 | func (f *ramFile) Dispose() { 362 | if !f.disposed { 363 | f.disposed = true 364 | f.storage.releaseL(&f.key, f.entry) 365 | } 366 | } 367 | 368 | func (f *ramFile) checkValid() { 369 | if f.disposed { 370 | panic("Already disposed") 371 | } 372 | } 373 | 374 | func (f *ramFile) Duplicate() File { 375 | f.checkValid() 376 | file, err := f.storage.Get(&f.key) 377 | if err != nil { 378 | panic("Couldn't duplicate file") 379 | } 380 | return file 381 | } 382 | 383 | func (f *ramFile) IsChunked() bool { 384 | f.checkValid() 385 | return len(f.entry.chunks) > 0 386 | } 387 | 388 | func (f *ramFile) Chunks() FileIterator { 389 | var chunks []chunkRef 390 | if len(f.entry.chunks) > 0 { 391 | chunks = f.entry.chunks 392 | } else { 393 | chunks = make([]chunkRef, 1) 394 | chunks[0] = chunkRef{f.key, f.Size()} 395 | } 396 | f.storage.lockL(&f.key, f.entry) 397 | return &ramChunksIter{ 398 | storage: f.storage, 399 | entry: f.entry, 400 | key: f.key, 401 | chunks: chunks, 402 | chunkIdx: 0, 403 | lastChunkIdx: -1, 404 | disposed: false, 405 | } 406 | } 407 | 408 | func (f *ramFile) NumChunks() int64 { 409 | if len(f.entry.chunks) > 0 { 410 | return int64(len(f.entry.chunks)) 411 | } else { 412 | return 1 413 | } 414 | } 415 | 416 | func (ci *ramChunksIter) checkValid() { 417 | if ci.disposed { 418 | panic("Already disposed") 419 | } 420 | } 421 | 422 | type ramChunksIter struct { 423 | storage *ramStorage 424 | key SKey 425 | entry *ramEntry 426 | chunks []chunkRef 427 | chunkIdx int 428 | lastChunkIdx int 429 | disposed bool 430 | } 431 | 432 | func (ci *ramChunksIter) Dispose() { 433 | if !ci.disposed { 434 | ci.disposed = true 435 | ci.storage.releaseL(&ci.key, ci.entry) 436 | } 437 | } 438 | 439 | func (ci *ramChunksIter) Duplicate() FileIterator { 440 | ci.checkValid() 441 | ci.storage.lockL(&ci.key, ci.entry) 442 | return &ramChunksIter{ 443 | storage: ci.storage, 444 | key: ci.key, 445 | entry: ci.entry, 446 | chunks: ci.chunks, 447 | chunkIdx: ci.chunkIdx, 448 | disposed: false, 449 | } 450 | } 451 | 452 | func (ci *ramChunksIter) Next() bool { 453 | ci.checkValid() 454 | if ci.chunkIdx == len(ci.chunks) { 455 | ci.Dispose() 456 | return false 457 | } else { 458 | ci.lastChunkIdx = ci.chunkIdx 459 | ci.chunkIdx++ 460 | return true 461 | } 462 | } 463 | 464 | func (ci *ramChunksIter) Key() SKey { 465 | ci.checkValid() 466 | return ci.chunks[ci.lastChunkIdx].key 467 | } 468 | 469 | func (ci *ramChunksIter) Size() int64 { 470 | ci.checkValid() 471 | startPos := int64(0) 472 | if ci.lastChunkIdx > 0 { 473 | startPos = ci.chunks[ci.lastChunkIdx-1].nextPos 474 | } 475 | return ci.chunks[ci.lastChunkIdx].nextPos - startPos 476 | } 477 | 478 | func (ci *ramChunksIter) File() File { 479 | ci.checkValid() 480 | if f, err := ci.storage.Get(&ci.chunks[ci.lastChunkIdx].key); err != nil { 481 | panic(err) 482 | } else { 483 | return f 484 | } 485 | } 486 | 487 | func (r *ramDataReader) Read(b []byte) (n int, err error) { 488 | if len(b) == 0 { 489 | return 0, nil 490 | } 491 | if r.index >= len(r.data) { 492 | return 0, io.EOF 493 | } 494 | n = copy(b, r.data[r.index:]) 495 | r.index += n 496 | return 497 | } 498 | 499 | func (r *ramDataReader) Close() error { 500 | return nil 501 | } 502 | 503 | func (r *ramChunkReader) Read(b []byte) (n int, err error) { 504 | if r.closed { 505 | err = ErrInvalidState 506 | return 507 | } 508 | for n == 0 && err == nil { 509 | if r.dataReader == nil { 510 | if len(r.chunksTail) > 0 { 511 | if f, e := r.storage.Get(&r.chunksTail[0].key); e != nil { 512 | panic(e) 513 | } else { 514 | defer f.Dispose() 515 | r.dataReader = f.Open() 516 | r.chunksTail = r.chunksTail[1:] 517 | } 518 | } else { 519 | return 0, io.EOF 520 | } 521 | } 522 | 523 | n, err = r.dataReader.Read(b) 524 | if err == io.EOF { 525 | // never pass through delegate EOF 526 | err = r.dataReader.Close() 527 | r.dataReader = nil 528 | } 529 | } 530 | return 531 | } 532 | 533 | func (r *ramChunkReader) Close() (err error) { 534 | if r.closed { 535 | return nil 536 | } 537 | r.closed = true 538 | 539 | r.storage.releaseL(&r.key, r.entry) 540 | 541 | if r.dataReader != nil { 542 | err = r.dataReader.Close() 543 | r.dataReader = nil 544 | } 545 | 546 | return 547 | } 548 | 549 | // Writes the current buffer into a new chunk and resets the buffer. 550 | // Assumes that chunkHash has already been updated. 551 | func (t *ramTemporary) flushBufferIntoChunk() error { 552 | if t.buffer.Len() == 0 { 553 | return nil 554 | } 555 | 556 | // Copy the chunk's data 557 | chunkInfo := fmt.Sprintf("%v #%d", t.info, len(t.chunks)) 558 | chunkData := make([]byte, t.buffer.Len()) 559 | copy(chunkData, t.buffer.Bytes()) 560 | 561 | // Get the chunk hash 562 | var key SKey 563 | t.chunkHash.Sum(key[:0]) 564 | t.chunkHash.Reset() 565 | 566 | if err := t.storage.storeEntry(&key, chunkData, nil, chunkInfo); err != nil { 567 | return err 568 | } 569 | 570 | chunk := chunkRef{ 571 | key: key, 572 | nextPos: int64(t.buffer.Len()), 573 | } 574 | if len(t.chunks) > 0 { 575 | chunk.nextPos += t.chunks[len(t.chunks)-1].nextPos 576 | } 577 | t.chunks = append(t.chunks, chunk) 578 | 579 | t.buffer.Reset() 580 | return nil 581 | } 582 | 583 | func (t *ramTemporary) Write(b []byte) (int, error) { 584 | if !t.valid || !t.open { 585 | return 0, ErrInvalidState 586 | } 587 | t.valid = false // only temporary -> set to true on successful end of function 588 | 589 | nBytes := len(b) 590 | 591 | for len(b) > 0 { 592 | nBoundary := t.chunker.Scan(b) 593 | if _, err := t.buffer.Write(b[:nBoundary]); err != nil { 594 | return 0, err 595 | } 596 | t.chunkHash.Write(b[:nBoundary]) 597 | t.fileHash.Write(b[:nBoundary]) 598 | if nBoundary < len(b) { 599 | // a chunk boundary was detected 600 | if err := t.flushBufferIntoChunk(); err != nil { 601 | return 0, err 602 | } 603 | b = b[nBoundary:] 604 | } else { 605 | b = nil 606 | } 607 | } 608 | 609 | t.valid = true 610 | return nBytes, nil 611 | } 612 | 613 | func (t *ramTemporary) Close() error { 614 | if !t.valid || !t.open { 615 | return ErrInvalidState 616 | } 617 | t.open = false 618 | t.valid = false // only temporary -> set to true on successful end of function 619 | var key SKey 620 | t.fileHash.Sum(key[:0]) 621 | 622 | if len(t.chunks) == 0 { 623 | // File is single-chunk 624 | data := make([]byte, t.buffer.Len()) 625 | copy(data, t.buffer.Bytes()) 626 | if err := t.storage.storeEntry(&key, data, nil, t.info); err != nil { 627 | return err 628 | } 629 | } else { 630 | // Flush buffer contents into one last chunk 631 | if err := t.flushBufferIntoChunk(); err != nil { 632 | return err 633 | } 634 | finalChunks := make([]chunkRef, len(t.chunks)) 635 | copy(finalChunks, t.chunks) 636 | if err := t.storage.storeEntry(&key, nil, finalChunks, t.info); err != nil { 637 | return err 638 | } 639 | } 640 | t.valid = true 641 | return nil 642 | } 643 | 644 | func (t *ramTemporary) File() File { 645 | if !t.valid { 646 | panic(ErrInvalidState) 647 | } 648 | if t.open { 649 | panic(ErrStillOpen) 650 | } 651 | 652 | var key SKey 653 | t.fileHash.Sum(key[:0]) 654 | 655 | file, err := t.storage.Get(&key) 656 | if err != nil { 657 | // Shouldn't happen 658 | panic(err) 659 | } 660 | return file 661 | } 662 | 663 | func (t *ramTemporary) Dispose() { 664 | if t.chunks == nil { 665 | // temporary was already disposed, we allow this 666 | return 667 | } 668 | 669 | t.releaseFromStorage() 670 | 671 | t.valid = false 672 | wasOpen := t.open 673 | t.open = false 674 | t.buffer = bytes.Buffer{} 675 | t.chunker = nil 676 | t.chunks = nil 677 | if LoggingEnabled { 678 | if wasOpen { 679 | log.Printf("[%v] Temporary canceled", t.info) 680 | } else { 681 | log.Printf("[%v] Temporary disposed", t.info) 682 | } 683 | } 684 | } 685 | 686 | // Calls release() on all chunks locked by this temporary. 687 | func (t *ramTemporary) releaseFromStorage() { 688 | t.storage.mutex.Lock() 689 | defer t.storage.mutex.Unlock() 690 | 691 | // dereference single-chunk entry if successfully closed 692 | if !t.open && t.valid { 693 | var key SKey 694 | t.fileHash.Sum(key[:0]) 695 | t.storage.release(&key, t.storage.entries[key]) 696 | } else { 697 | // dereference all locked chunks otherwise 698 | // (they have been locked once just by storing them) 699 | for _, chunk := range t.chunks { 700 | t.storage.release(&chunk.key, t.storage.entries[chunk.key]) 701 | } 702 | } 703 | } 704 | -------------------------------------------------------------------------------- /ram/ramstorage_test.go: -------------------------------------------------------------------------------- 1 | package ram 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/indyjo/cafs" 6 | "io" 7 | "math/rand" 8 | "testing" 9 | ) 10 | 11 | func TestSimple(t *testing.T) { 12 | s := NewRamStorage(1000) 13 | _ = addData(t, s, 128) 14 | } 15 | 16 | func TestTwo(t *testing.T) { 17 | s := NewRamStorage(1000) 18 | f1 := addData(t, s, 128) 19 | f2 := addData(t, s, 256) 20 | if f1.Key() == f2.Key() { 21 | t.FailNow() 22 | } 23 | } 24 | 25 | func TestSame(t *testing.T) { 26 | s := NewRamStorage(1000) 27 | f1 := addData(t, s, 128) 28 | f2 := addData(t, s, 128) 29 | if f1.Key() != f2.Key() { 30 | t.FailNow() 31 | } 32 | } 33 | 34 | func TestEmptyFile(t *testing.T) { 35 | s := NewRamStorage(1000) 36 | f := addData(t, s, 0) 37 | if f.Size() != 0 { 38 | t.FailNow() 39 | } 40 | iter := f.Chunks() 41 | if !iter.Next() { 42 | t.Fatal("Expected empty file to have at least one chunk") 43 | } 44 | if iter.Key().String() != "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" { 45 | t.Fatalf("Unexpected key of empty chunk: %v", iter.Key()) 46 | } 47 | if iter.Next() { 48 | t.Fatal("Expected empty file to not have any further chunks") 49 | } 50 | } 51 | 52 | type logPrinter struct { 53 | } 54 | 55 | func (p logPrinter) Printf(format string, v ...interface{}) { 56 | fmt.Printf(format+"\n", v...) 57 | } 58 | 59 | func TestLRU(t *testing.T) { 60 | s := NewRamStorage(1000) 61 | f1 := addData(t, s, 400) 62 | f1.Dispose() 63 | //s.DumpStatistics(logPrinter{}) 64 | f2 := addData(t, s, 350) 65 | f2.Dispose() 66 | //s.DumpStatistics(logPrinter{}) 67 | f3 := addData(t, s, 250) 68 | f3.Dispose() 69 | //s.DumpStatistics(logPrinter{}) 70 | f4 := addData(t, s, 450) 71 | f4.Dispose() 72 | //s.DumpStatistics(logPrinter{}) 73 | var key SKey 74 | key = f1.Key() 75 | if _, err := s.Get(&key); err != ErrNotFound { 76 | t.Fatalf("f1 should have been removed. err:%v", err) 77 | } 78 | key = f2.Key() 79 | if _, err := s.Get(&key); err != ErrNotFound { 80 | t.Fatalf("f2 should have been removed. err:%v", err) 81 | } 82 | key = f4.Key() 83 | if f, err := s.Get(&key); err != nil { 84 | t.Fatalf("f4 should be stored. err:%v", err) 85 | } else { 86 | f.Dispose() 87 | } 88 | key = f3.Key() 89 | if f, err := s.Get(&key); err != nil { 90 | t.Fatalf("f3 should not have been removed. err:%v", err) 91 | } else { 92 | f.Dispose() 93 | } 94 | 95 | //s.DumpStatistics(logPrinter{}) 96 | 97 | // Now f3 is youngest, then f4 (f1 and f2 are gone) 98 | addData(t, s, 500).Dispose() 99 | 100 | key = f4.Key() 101 | if _, err := s.Get(&key); err != ErrNotFound { 102 | t.Fatalf("f4 should have been removed. err:%v", err) 103 | } 104 | key = f3.Key() 105 | if _, err := s.Get(&key); err != nil { 106 | t.Fatalf("f3 should be stored. err:%v", err) 107 | } 108 | 109 | { 110 | defer func() { 111 | if v := recover(); v == ErrNotEnoughSpace { 112 | t.Logf("Expectedly recovered from: %v", v) 113 | } else { 114 | t.Fatalf("Expected to recover from something other than: %v", v) 115 | } 116 | }() 117 | addData(t, s, 1010) 118 | } 119 | } 120 | 121 | func TestCompression(t *testing.T) { 122 | s := NewRamStorage(1000000) 123 | f1 := addData(t, s, 1000001) 124 | defer f1.Dispose() 125 | iter := f1.Chunks() 126 | defer iter.Dispose() 127 | t.Log("Iterating over chunks...") 128 | for iter.Next() { 129 | t.Logf("Chunk: Key %v, size %v", iter.Key(), iter.Size()) 130 | } 131 | } 132 | 133 | func TestCompression2(t *testing.T) { 134 | s := NewRamStorage(1000000) 135 | temp := s.Create("Adding cyclic random data") 136 | defer temp.Dispose() 137 | cycle := 65536 138 | times := 24 139 | r := rand.New(rand.NewSource(0)) 140 | data := make([]byte, cycle) 141 | for i := 0; i < cycle; i++ { 142 | data[i] = byte(r.Int()) 143 | } 144 | t.Logf("data=%016x...", data[:8]) 145 | for i := 0; i < times; i++ { 146 | if _, err := temp.Write(data); err != nil { 147 | t.Errorf("Error on Write: %v", err) 148 | } 149 | } 150 | if err := temp.Close(); err != nil { 151 | t.Errorf("Error on Close: %v", err) 152 | } 153 | 154 | f := temp.File() 155 | defer f.Dispose() 156 | w := f.Open() 157 | data2 := make([]byte, 1) 158 | for i := 0; i < times*cycle; i++ { 159 | if n, err := io.ReadFull(w, data2); err != nil || n != 1 { 160 | t.Fatalf("Error on Read: %v (n=%d)", err, n) 161 | } 162 | if data2[0] != data[i%cycle] { 163 | t.Fatalf("Data read != data written on byte %d: %02x != %02x", i, data2[0], data[i%cycle]) 164 | } 165 | } 166 | } 167 | 168 | func TestRefCounting(t *testing.T) { 169 | _s := NewRamStorage(80 * 1024) 170 | //s := _s.(*ramStorage) 171 | _f := addRandomData(t, _s, 60*1024) 172 | f := _f.(*ramFile) 173 | //defer s.DumpStatistics(logPrinter{}) 174 | if f.entry.refs != 1 { 175 | t.Fatalf("Refs != 1 before dispose: %v", f.entry.refs) 176 | } 177 | _f.Dispose() 178 | if f.entry.refs != 0 { 179 | t.Fatalf("Refs != 0 after dispose: %v", f.entry.refs) 180 | } 181 | // This has to push out many chunks of first file 182 | addRandomData(t, _s, 70*1024) 183 | } 184 | 185 | func addData(t *testing.T, s FileStorage, size int) File { 186 | temp := s.Create(fmt.Sprintf("Adding %v bytes object", size)) 187 | defer temp.Dispose() 188 | for size > 0 { 189 | if _, err := temp.Write([]byte{byte(size)}); err != nil { 190 | panic(err) 191 | } 192 | size-- 193 | } 194 | if err := temp.Close(); err != nil { 195 | panic(err) 196 | } 197 | return temp.File() 198 | } 199 | 200 | func addRandomData(t *testing.T, s FileStorage, size int) File { 201 | temp := s.Create(fmt.Sprintf("%v random bytes", size)) 202 | defer temp.Dispose() 203 | buf := make([]byte, size) 204 | for i, _ := range buf { 205 | buf[i] = byte(rand.Int()) 206 | } 207 | if _, err := temp.Write(buf); err != nil { 208 | panic(err) 209 | } 210 | if err := temp.Close(); err != nil { 211 | panic(err) 212 | } 213 | return temp.File() 214 | } 215 | -------------------------------------------------------------------------------- /remotesync/httpsync/cmd/synctest/synctest.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see .package main 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "flag" 22 | "fmt" 23 | "github.com/indyjo/cafs" 24 | "github.com/indyjo/cafs/ram" 25 | "github.com/indyjo/cafs/remotesync" 26 | "github.com/indyjo/cafs/remotesync/httpsync" 27 | "io" 28 | "log" 29 | "math/rand" 30 | "net/http" 31 | "os" 32 | "runtime/pprof" 33 | ) 34 | 35 | var storage cafs.FileStorage = ram.NewRamStorage(1 << 30) 36 | var fileHandlers = make(map[string]*httpsync.FileHandler) 37 | 38 | func main() { 39 | addr := ":8080" 40 | flag.StringVar(&addr, "l", addr, "which port to listen to") 41 | 42 | preload := "" 43 | flag.StringVar(&preload, "i", preload, "input file to load") 44 | 45 | flag.BoolVar(&remotesync.LoggingEnabled, "enable-remotesync-logging", remotesync.LoggingEnabled, 46 | "enables detailed logging from the remotesync algorithm") 47 | 48 | flag.Parse() 49 | 50 | if preload != "" { 51 | if err := loadFile(storage, preload); err != nil { 52 | log.Fatalf("Error loading '[%v]: %v", preload, err) 53 | } 54 | } 55 | 56 | http.HandleFunc("/load", handleLoad) 57 | http.HandleFunc("/sync", handleSyncFrom) 58 | http.HandleFunc("/stackdump", func(w http.ResponseWriter, r *http.Request) { 59 | name := r.FormValue("name") 60 | if len(name) == 0 { 61 | name = "goroutine" 62 | } 63 | profile := pprof.Lookup(name) 64 | if profile == nil { 65 | _, _ = w.Write([]byte("No such profile")) 66 | return 67 | } 68 | err := profile.WriteTo(w, 1) 69 | if err != nil { 70 | log.Printf("Error in profile.WriteTo: %v\n", err) 71 | } 72 | }) 73 | 74 | err := http.ListenAndServe(addr, nil) 75 | if err != nil { 76 | log.Fatalf("Error in ListenAndServe: %v", err) 77 | } 78 | } 79 | 80 | func loadFile(storage cafs.FileStorage, path string) (err error) { 81 | f, err := os.Open(path) 82 | if err != nil { 83 | return 84 | } 85 | 86 | tmp := storage.Create(path) 87 | defer tmp.Dispose() 88 | n, err := io.Copy(tmp, f) 89 | if err != nil { 90 | return fmt.Errorf("error after copying %v bytes: %v", n, err) 91 | } 92 | 93 | err = tmp.Close() 94 | if err != nil { 95 | return 96 | } 97 | 98 | file := tmp.File() 99 | defer file.Dispose() 100 | log.Printf("Read file: %v (%v bytes, chunked: %v, %v chunks)", path, n, file.IsChunked(), file.NumChunks()) 101 | 102 | printer := log.New(os.Stderr, "", log.LstdFlags) 103 | handler := httpsync.NewFileHandlerFromFile(file, rand.Perm(256)).WithPrinter(printer) 104 | fileHandlers[file.Key().String()] = handler 105 | 106 | path = fmt.Sprintf("/file/%v", file.Key().String()[:16]) 107 | http.Handle(path, handler) 108 | log.Printf(" serving under %v", path) 109 | return 110 | } 111 | 112 | func handleLoad(w http.ResponseWriter, r *http.Request) { 113 | if r.Method != "POST" { 114 | http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) 115 | return 116 | } 117 | path := r.FormValue("path") 118 | if err := loadFile(storage, path); err != nil { 119 | http.Error(w, err.Error(), http.StatusInternalServerError) 120 | } 121 | } 122 | 123 | func handleSyncFrom(w http.ResponseWriter, r *http.Request) { 124 | if r.Method != "POST" { 125 | http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) 126 | return 127 | } 128 | source := r.FormValue("source") 129 | if err := syncFile(storage, source); err != nil { 130 | http.Error(w, err.Error(), http.StatusInternalServerError) 131 | } 132 | } 133 | 134 | func syncFile(fileStorage cafs.FileStorage, source string) error { 135 | log.Printf("Sync from %v", source) 136 | if file, err := httpsync.SyncFrom(context.Background(), fileStorage, http.DefaultClient, source, "synced from "+source); err != nil { 137 | return err 138 | } else { 139 | log.Printf("Successfully received %v (%v bytes)", file.Key(), file.Size()) 140 | file.Dispose() 141 | } 142 | return nil 143 | } 144 | -------------------------------------------------------------------------------- /remotesync/httpsync/httpsync.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | // Package httpsync implements methods for requesting and serving files via CAFS 18 | package httpsync 19 | 20 | import ( 21 | "bufio" 22 | "context" 23 | "encoding/json" 24 | "fmt" 25 | "github.com/indyjo/cafs" 26 | "github.com/indyjo/cafs/remotesync" 27 | "github.com/indyjo/cafs/remotesync/shuffle" 28 | "io" 29 | "io/ioutil" 30 | "net/http" 31 | "sync" 32 | "time" 33 | ) 34 | 35 | // Struct FileHandler implements the http.Handler interface and serves a file over HTTP. 36 | // The protocol used matches with function SyncFrom. 37 | // Create using the New... functions. 38 | type FileHandler struct { 39 | m sync.Mutex 40 | source chunksSource 41 | syncinfo *remotesync.SyncInfo 42 | log cafs.Printer 43 | } 44 | 45 | // It is the owner's responsibility to correctly dispose of FileHandler instances. 46 | func (handler *FileHandler) Dispose() { 47 | handler.m.Lock() 48 | s := handler.source 49 | handler.source = nil 50 | handler.syncinfo = nil 51 | handler.m.Unlock() 52 | if s != nil { 53 | s.Dispose() 54 | } 55 | } 56 | 57 | // Function NewFileHandlerFromFile creates a FileHandler that serves chunks of a File. 58 | func NewFileHandlerFromFile(file cafs.File, perm shuffle.Permutation) *FileHandler { 59 | result := &FileHandler{ 60 | m: sync.Mutex{}, 61 | source: fileBasedChunksSource{file: file.Duplicate()}, 62 | syncinfo: &remotesync.SyncInfo{Perm: perm}, 63 | log: cafs.NewWriterPrinter(ioutil.Discard), 64 | } 65 | result.syncinfo.SetChunksFromFile(file) 66 | return result 67 | } 68 | 69 | // Function NewFileHandlerFromSyncInfo creates a FileHandler that serves chunks as 70 | // specified in a FileInfo. It doesn't necessarily require all of the chunks to be present 71 | // and will block waiting for a missing chunk to become available. 72 | // As a specialty, a FileHander created using this function needs not be disposed. 73 | func NewFileHandlerFromSyncInfo(syncinfo *remotesync.SyncInfo, storage cafs.FileStorage) *FileHandler { 74 | result := &FileHandler{ 75 | m: sync.Mutex{}, 76 | source: syncInfoChunksSource{ 77 | syncinfo: syncinfo, 78 | storage: storage, 79 | }, 80 | syncinfo: syncinfo, 81 | log: cafs.NewWriterPrinter(ioutil.Discard), 82 | } 83 | return result 84 | } 85 | 86 | // Sets the FileHandler's log Printer. 87 | func (handler *FileHandler) WithPrinter(printer cafs.Printer) *FileHandler { 88 | handler.log = printer 89 | return handler 90 | } 91 | 92 | func (handler *FileHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 93 | if r.Method == http.MethodGet { 94 | if err := json.NewEncoder(w).Encode(handler.syncinfo); err != nil { 95 | handler.log.Printf("Error serving SyncInfo: R%v", err) 96 | } 97 | return 98 | } else if r.Method != http.MethodPost { 99 | http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) 100 | return 101 | } 102 | 103 | // Require a Connection: close header that will trick Go's HTTP server into allowing bi-directional streams. 104 | if r.Header.Get("Connection") != "close" { 105 | http.Error(w, "Connection: close required", http.StatusBadRequest) 106 | return 107 | } 108 | 109 | chunks, err := handler.source.GetChunks() 110 | if err != nil { 111 | handler.log.Printf("GetChunks() failed: %v", err) 112 | http.Error(w, err.Error(), http.StatusInternalServerError) 113 | return 114 | } 115 | defer chunks.Dispose() 116 | 117 | w.WriteHeader(http.StatusOK) 118 | w.(http.Flusher).Flush() 119 | 120 | var bytesSkipped, bytesTransferred int64 121 | cb := func(toTransfer, transferred int64) { 122 | bytesSkipped = -toTransfer 123 | bytesTransferred = transferred 124 | } 125 | handler.log.Printf("Calling WriteChunkData") 126 | start := time.Now() 127 | err = remotesync.WriteChunkData(chunks, 0, bufio.NewReader(r.Body), handler.syncinfo.Perm, 128 | remotesync.SimpleFlushWriter{w, w.(http.Flusher)}, cb) 129 | duration := time.Since(start) 130 | speed := float64(bytesTransferred) / duration.Seconds() 131 | handler.log.Printf("WriteChunkData took %v. KBytes transferred: %v (%.2f/s) skipped: %v", 132 | duration, bytesTransferred>>10, speed/1024, bytesSkipped>>10) 133 | if err != nil { 134 | handler.log.Printf("Error in WriteChunkData: %v", err) 135 | return 136 | } 137 | } 138 | 139 | // Function SyncFrom uses an HTTP client to connect to some URL and download a fie into the 140 | // given FileStorage. 141 | func SyncFrom(ctx context.Context, storage cafs.FileStorage, client *http.Client, url, info string) (file cafs.File, err error) { 142 | // Fetch SyncInfo from remote 143 | resp, err := client.Get(url) 144 | if err != nil { 145 | return 146 | } 147 | if resp.StatusCode != http.StatusOK { 148 | return nil, fmt.Errorf("GET returned status %v", resp.Status) 149 | } 150 | var syncinfo remotesync.SyncInfo 151 | err = json.NewDecoder(resp.Body).Decode(&syncinfo) 152 | if err != nil { 153 | return 154 | } 155 | 156 | // Create Builder and establish a bidirectional POST connection 157 | builder := remotesync.NewBuilder(storage, &syncinfo, 32, info) 158 | defer builder.Dispose() 159 | 160 | pr, pw := io.Pipe() 161 | req, err := http.NewRequest(http.MethodPost, url, pr) 162 | if err != nil { 163 | return 164 | } 165 | 166 | // Enable cancelation 167 | req = req.WithContext(ctx) 168 | 169 | // Trick Go's HTTP server implementation into allowing bi-directional data flow 170 | req.Header.Set("Connection", "close") 171 | 172 | go func() { 173 | if err := builder.WriteWishList(remotesync.NopFlushWriter{pw}); err != nil { 174 | _ = pw.CloseWithError(fmt.Errorf("error in WriteWishList: %v", err)) 175 | return 176 | } 177 | _ = pw.Close() 178 | }() 179 | 180 | res, err := client.Do(req) 181 | if err != nil { 182 | return 183 | } 184 | file, err = builder.ReconstructFileFromRequestedChunks(res.Body) 185 | return 186 | } 187 | -------------------------------------------------------------------------------- /remotesync/httpsync/util.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see .package main 16 | 17 | package httpsync 18 | 19 | import ( 20 | "github.com/indyjo/cafs" 21 | "github.com/indyjo/cafs/remotesync" 22 | "io" 23 | "sync" 24 | "time" 25 | ) 26 | 27 | // Interface chunksSource specifies a factory for Chunks 28 | type chunksSource interface { 29 | GetChunks() (remotesync.Chunks, error) 30 | Dispose() 31 | } 32 | 33 | // struct fileBasedChunksSource implements ChunksSource using a File. 34 | type fileBasedChunksSource struct { 35 | m sync.Mutex 36 | file cafs.File 37 | } 38 | 39 | func (f fileBasedChunksSource) GetChunks() (remotesync.Chunks, error) { 40 | f.m.Lock() 41 | file := f.file 42 | f.m.Unlock() 43 | if file == nil { 44 | return nil, remotesync.ErrDisposed 45 | } 46 | return remotesync.ChunksOfFile(file), nil 47 | } 48 | 49 | func (f fileBasedChunksSource) Dispose() { 50 | f.m.Lock() 51 | file := f.file 52 | f.file = nil 53 | f.m.Unlock() 54 | if file != nil { 55 | file.Dispose() 56 | } 57 | } 58 | 59 | // Struct syncInfoChunksSource implements ChunksSource using only a SyncInfo object. 60 | // It requests chunks from a FileStore and waits until that chunk becomes available. 61 | // There is no guarantee that chunks are kept or will actually become available at some 62 | // time. 63 | type syncInfoChunksSource struct { 64 | syncinfo *remotesync.SyncInfo 65 | storage cafs.FileStorage 66 | } 67 | 68 | func (s syncInfoChunksSource) GetChunks() (remotesync.Chunks, error) { 69 | return &syncInfoChunks{ 70 | chunks: s.syncinfo.Chunks, 71 | storage: s.storage, 72 | done: make(chan struct{}), 73 | }, nil 74 | } 75 | 76 | func (s syncInfoChunksSource) Dispose() { 77 | } 78 | 79 | // Struct syncInfoChunks implements the Chunks interface and does the actual waiting. 80 | type syncInfoChunks struct { 81 | chunks []remotesync.ChunkInfo 82 | storage cafs.FileStorage 83 | done chan struct{} 84 | } 85 | 86 | func (s *syncInfoChunks) NextChunk() (cafs.File, error) { 87 | if len(s.chunks) == 0 { 88 | return nil, io.EOF 89 | } 90 | key := s.chunks[0].Key 91 | s.chunks = s.chunks[1:] 92 | ticker := time.NewTicker(100 * time.Millisecond) 93 | defer func() { 94 | ticker.Stop() 95 | }() 96 | for { 97 | if f, err := s.storage.Get(&key); err == nil { 98 | return f, nil 99 | } else if err != cafs.ErrNotFound { 100 | return nil, err 101 | } 102 | 103 | select { 104 | case <-s.done: 105 | return nil, remotesync.ErrDisposed 106 | case <-ticker.C: 107 | // next try 108 | } 109 | } 110 | } 111 | 112 | func (s *syncInfoChunks) Dispose() { 113 | close(s.done) 114 | } 115 | -------------------------------------------------------------------------------- /remotesync/receive.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package remotesync 18 | 19 | import ( 20 | "bufio" 21 | "errors" 22 | "fmt" 23 | "github.com/indyjo/cafs" 24 | "github.com/indyjo/cafs/remotesync/shuffle" 25 | "io" 26 | "log" 27 | "sync" 28 | ) 29 | 30 | var ErrDisposed = errors.New("disposed") 31 | var ErrUnexpectedChunk = errors.New("unexpected chunk") 32 | 33 | // Used by receiver to memorize information about a chunk in the time window between 34 | // putting it into the wishlist and receiving the actual chunk data. 35 | type memo struct { 36 | ci ChunkInfo // key and length 37 | file cafs.File // A File if the chunk existed already, nil otherwise 38 | requested bool // Whether the chunk was requested from the sender 39 | } 40 | 41 | // Type Builder contains state needed for the duration of a file transmission. 42 | type Builder struct { 43 | done chan struct{} 44 | storage cafs.FileStorage 45 | memos chan memo 46 | info string 47 | syncinf *SyncInfo 48 | 49 | mutex sync.Mutex // Guards subsequent variables 50 | disposed bool // Set in Dispose 51 | started bool // Set in WriteWishList. Signals that chunks channel will be used. 52 | } 53 | 54 | // Returns a new Builder for reconstructing a file. Must eventually be disposed. 55 | // The builder can then proceed sending a "wishlist" of chunks that are missing 56 | // in the local storage for complete reconstruction of the file. 57 | func NewBuilder(storage cafs.FileStorage, syncinf *SyncInfo, windowSize int, info string) *Builder { 58 | return &Builder{ 59 | done: make(chan struct{}), 60 | storage: storage, 61 | memos: make(chan memo, windowSize), 62 | info: info, 63 | syncinf: syncinf, 64 | } 65 | } 66 | 67 | // Disposes the Builder. Must be called exactly once per Builder. May cause the goroutines running 68 | // WriteWishList and ReconstructFileFromRequestedChunks to terminate with error ErrDisposed. 69 | func (b *Builder) Dispose() { 70 | b.mutex.Lock() 71 | if b.disposed { 72 | panic("Builder must be disposed exactly once") 73 | } 74 | b.disposed = true 75 | started := b.started 76 | b.mutex.Unlock() 77 | 78 | close(b.done) 79 | 80 | if started { 81 | for chunk := range b.memos { 82 | if chunk.file != nil { 83 | chunk.file.Dispose() 84 | } 85 | } 86 | } 87 | } 88 | 89 | // Outputs a bit stream with '1' for each missing chunk, and 90 | // '0' for each chunk that is already available or already requested. 91 | func (b *Builder) WriteWishList(w FlushWriter) error { 92 | if LoggingEnabled { 93 | log.Printf("Receiver: Begin WriteWishList") 94 | defer log.Printf("Receiver: End WriteWishList") 95 | } 96 | 97 | if err := b.start(); err != nil { 98 | return err 99 | } 100 | 101 | defer close(b.memos) 102 | 103 | requested := make(map[cafs.SKey]bool) 104 | bitWriter := newBitWriter(w) 105 | 106 | consumeFunc := func(v interface{}) error { 107 | ci := v.(ChunkInfo) 108 | key := ci.Key 109 | 110 | mem := memo{ 111 | ci: ci, 112 | } 113 | 114 | if key == emptyKey || requested[key] { 115 | // This key was already requested. Also, the empty key is never requested. 116 | mem.requested = false 117 | } else if file, err := b.storage.Get(&key); err != nil { 118 | // File was not found in storage -> request and remember 119 | mem.requested = true 120 | requested[key] = true 121 | } else { 122 | // File was already in storage -> prevent it from being collected until it is needed 123 | mem.file = file 124 | mem.requested = false 125 | requested[key] = true 126 | } 127 | 128 | // Write memo into channel. This might block if channel buffer is full. 129 | // Only wait until disposed. 130 | select { 131 | case b.memos <- mem: 132 | // Responsibility for disposing chunk.file is passed to the channel 133 | case <-b.done: 134 | if mem.file != nil { 135 | mem.file.Dispose() 136 | } 137 | return ErrDisposed 138 | } 139 | 140 | if err := bitWriter.WriteBit(mem.requested); err != nil { 141 | return err 142 | } 143 | 144 | return nil // success 145 | } 146 | 147 | // Create a shuffler using the above consumeFunc and push the SyncInfo's chunk infos through it. 148 | // For every ChunkInfo leaving the shuffler (in shuffled order), the consumeFunc 149 | // writes a bit into the wishlist. 150 | shuffler := shuffle.NewStreamShuffler(b.syncinf.Perm, emptyChunkInfo, consumeFunc) 151 | nChunks := len(b.syncinf.Chunks) 152 | for idx := 0; idx < nChunks; idx++ { 153 | if err := shuffler.Put(b.syncinf.Chunks[idx]); err != nil { 154 | return fmt.Errorf("error from shuffler.Put: %v", err) 155 | } 156 | } 157 | if err := shuffler.End(); err != nil { 158 | return fmt.Errorf("error from shuffler.End: %v", err) 159 | } 160 | return bitWriter.Flush() 161 | } 162 | 163 | // Function start is called by WriteWishList to mark the Builder as started. 164 | // This has consequences for the Dispose method. 165 | func (b *Builder) start() error { 166 | b.mutex.Lock() 167 | defer b.mutex.Unlock() 168 | if b.disposed { 169 | return ErrDisposed 170 | } 171 | if b.started { 172 | panic("WriteWishList called twice") 173 | } 174 | b.started = true 175 | return nil 176 | } 177 | 178 | var placeholder interface{} = struct{}{} 179 | var zeroMemo = memo{} 180 | 181 | // Reads a sequence of length-prefixed data chunks and tries to reconstruct a file from that 182 | // information. 183 | func (b *Builder) ReconstructFileFromRequestedChunks(_r io.Reader) (cafs.File, error) { 184 | if LoggingEnabled { 185 | log.Printf("Receiver: Begin ReconstructFileFromRequestedChunks") 186 | defer log.Printf("Receiver: End ReconstructFileFromRequestedChunks") 187 | } 188 | 189 | temp := b.storage.Create(b.info) 190 | defer temp.Dispose() 191 | 192 | r := bufio.NewReader(_r) 193 | 194 | errDone := errors.New("done") 195 | 196 | unshuffler := shuffle.NewInverseStreamShuffler(b.syncinf.Perm, placeholder, func(v interface{}) error { 197 | chunk := v.(cafs.File) 198 | // Write a chunk of the work file 199 | err := appendChunk(temp, chunk) 200 | chunk.Dispose() 201 | return err 202 | }) 203 | 204 | // Make sure all chunks in the unshuffler are disposed in the end 205 | defer unshuffler.WithFunc(func(v interface{}) error { 206 | v.(cafs.File).Dispose() 207 | return nil 208 | }).End() 209 | 210 | idx := 0 211 | iteration := func() error { 212 | var mem memo 213 | 214 | // Wait until either a chunk info can be read from the channel, or the builder 215 | // has been disposed. 216 | select { 217 | case <-b.done: 218 | return ErrDisposed 219 | case mem = <-b.memos: 220 | // successfully read, continue... 221 | } 222 | 223 | // It is our responsibility to dispose the file. 224 | if mem.file != nil { 225 | defer mem.file.Dispose() 226 | } 227 | 228 | if mem.ci == emptyChunkInfo { 229 | return unshuffler.Put(placeholder) 230 | } 231 | 232 | // Under the following circumstances, read chunk data from the stream. 233 | // - chunk data was requested 234 | // - the chunk memo stream has ended (to check whether the chunk data stream also ends). 235 | // If there was a real error, abort. 236 | if mem.requested || mem == zeroMemo { 237 | chunkFile, err := readChunk(b.storage, r, fmt.Sprintf("%v #%d", b.info, idx)) 238 | if chunkFile != nil { 239 | defer chunkFile.Dispose() 240 | } 241 | if err == io.EOF && mem == zeroMemo { 242 | return errDone 243 | } else if err == io.EOF { 244 | return io.ErrUnexpectedEOF 245 | } else if err != nil { 246 | return err 247 | } else if mem == zeroMemo { 248 | return fmt.Errorf("unsolicited chunk data") 249 | } else if chunkFile.Key() != mem.ci.Key { 250 | return ErrUnexpectedChunk 251 | } else if chunkFile.Size() != int64(mem.ci.Size) { 252 | return ErrUnexpectedChunk 253 | } 254 | } 255 | 256 | // Retrieve the chunk from CAFS (we can expect to find it) 257 | chunk, _ := b.storage.Get(&mem.ci.Key) 258 | // ... and dispatch it to the unshuffler, where it will be buffered for a while. 259 | // Disposing is done by the unshuffler's ConsumeFunc. 260 | if LoggingEnabled { 261 | log.Printf("Receiver: unshuffler.Put(total:%v, %v)", chunk.Size(), chunk.Key()) 262 | } 263 | return unshuffler.Put(chunk) 264 | } 265 | 266 | for { 267 | if err := iteration(); err == errDone { 268 | break 269 | } else if err != nil { 270 | return nil, err 271 | } 272 | idx++ 273 | } 274 | 275 | if err := unshuffler.End(); err != nil { 276 | return nil, err 277 | } 278 | 279 | if err := temp.Close(); err != nil { 280 | return nil, err 281 | } 282 | 283 | return temp.File(), nil 284 | } 285 | 286 | // Function appendChunk appends data of `chunk` to `temp`. 287 | func appendChunk(temp io.Writer, chunk cafs.File) error { 288 | if LoggingEnabled { 289 | log.Printf("Receiver: appendChunk(total:%v, %v)", chunk.Size(), chunk.Key()) 290 | } 291 | r := chunk.Open() 292 | //noinspection GoUnhandledErrorResult 293 | defer r.Close() 294 | if _, err := io.Copy(temp, r); err != nil { 295 | return err 296 | } 297 | return nil 298 | } 299 | -------------------------------------------------------------------------------- /remotesync/remotesync.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2018 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | // Package remotesync implements a differential file synching mechanism based on the content-based chunking 18 | // that is used by CAFS internally. 19 | // Step 1: Sender and receiver agree on hashes of the file's chunks 20 | // Step 2: Receiver streams missing chunks (one bit per chunk) 21 | // Step 3: Sender responds by sending content of requested chunks 22 | package remotesync 23 | 24 | var LoggingEnabled = false 25 | -------------------------------------------------------------------------------- /remotesync/remotesync_test.go: -------------------------------------------------------------------------------- 1 | package remotesync 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "github.com/indyjo/cafs" 7 | . "github.com/indyjo/cafs/ram" 8 | "github.com/indyjo/cafs/remotesync/shuffle" 9 | "io" 10 | "math/rand" 11 | "testing" 12 | ) 13 | 14 | // This is a regression test that deadlocks as long as indyjo/bitwrk#152 isn't solved. 15 | // https://github.com/indyjo/bitwrk/issues/152 16 | func TestDispose(t *testing.T) { 17 | store := NewRamStorage(256 * 1024) 18 | syncinfo := &SyncInfo{} 19 | syncinfo.SetPermutation(rand.Perm(10)) 20 | builder := NewBuilder(store, syncinfo, 8, "Test file") 21 | // Dispose builder before call to WriteWishList 22 | builder.Dispose() 23 | } 24 | 25 | func TestRemoteSync(t *testing.T) { 26 | // Re-use stores to test for leaks on the fly 27 | storeA := NewRamStorage(8 * 1024 * 1024) 28 | storeB := NewRamStorage(8 * 1024 * 1024) 29 | // LoggingEnabled = true 30 | 31 | // Test for different amounts of overlapping data 32 | for _, p := range []float64{0, 0.01, 0.25, 0.5, 0.75, 0.99, 1} { 33 | // Test for different number of blocks, so that storeB will _almost_ be filled up. 34 | // We can't test up to 512 because we don't know how much overhead data was produced 35 | // by the chunking algorithm (yes, RAM storage counts that overhead!) 36 | for _, nBlocks := range []int{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 400} { 37 | sigma := 0.25 38 | if nBlocks > 256 { 39 | sigma = 0 40 | } 41 | // Test for different permutation sizes 42 | for _, permSize := range []int{1, 2, 3, 5, 10, 100, 1000} { 43 | perm := shuffle.Permutation(rand.Perm(permSize)) 44 | func() { 45 | defer reportUsage(t, "B", storeB) 46 | defer reportUsage(t, "A", storeA) 47 | testWithParams(t, storeA, storeB, p, sigma, nBlocks, perm) 48 | }() 49 | } 50 | } 51 | } 52 | } 53 | 54 | func check(t *testing.T, msg string, err error) { 55 | if err != nil { 56 | t.Fatalf("Error %v: %v", msg, err) 57 | } 58 | } 59 | 60 | func testWithParams(t *testing.T, storeA, storeB cafs.BoundedStorage, p, sigma float64, nBlocks int, perm shuffle.Permutation) { 61 | t.Logf("Testing with params: p=%f, nBlocks=%d, permSize=%d", p, nBlocks, len(perm)) 62 | tempA := storeA.Create(fmt.Sprintf("Data A(%.2f,%d)", p, nBlocks)) 63 | defer tempA.Dispose() 64 | tempB := storeB.Create(fmt.Sprintf("Data B(%.2f,%d)", p, nBlocks)) 65 | defer tempB.Dispose() 66 | 67 | check(t, "creating similar data", createSimilarData(tempA, tempB, p, sigma, 8192, nBlocks)) 68 | 69 | check(t, "closing tempA", tempA.Close()) 70 | check(t, "closing tempB", tempB.Close()) 71 | 72 | fileA := tempA.File() 73 | defer fileA.Dispose() 74 | 75 | syncinf := &SyncInfo{} 76 | syncinf.SetPermutation(perm) 77 | syncinf.SetChunksFromFile(fileA) 78 | builder := NewBuilder(storeB, syncinf, 8, fmt.Sprintf("Recovered A(%.2f,%d)", p, nBlocks)) 79 | defer builder.Dispose() 80 | 81 | // task: transfer file A to storage B 82 | // Pipe 1 is used to transfer the wishlist bit-stream from the receiver to the sender 83 | pipeReader1, pipeWriter1 := io.Pipe() 84 | // Pipe 2 is used to transfer the actual requested chunk data to the receiver 85 | pipeReader2, pipeWriter2 := io.Pipe() 86 | 87 | go func() { 88 | if err := builder.WriteWishList(NopFlushWriter{pipeWriter1}); err != nil { 89 | _ = pipeWriter1.CloseWithError(fmt.Errorf("Error generating wishlist: %v", err)) 90 | } else { 91 | _ = pipeWriter1.Close() 92 | } 93 | }() 94 | 95 | go func() { 96 | chunks := ChunksOfFile(fileA) 97 | defer chunks.Dispose() 98 | if err := WriteChunkData(chunks, fileA.Size(), bufio.NewReader(pipeReader1), perm, NopFlushWriter{pipeWriter2}, nil); err != nil { 99 | _ = pipeWriter2.CloseWithError(fmt.Errorf("Error sending requested chunk data: %v", err)) 100 | } else { 101 | _ = pipeWriter2.Close() 102 | } 103 | }() 104 | 105 | var fileB cafs.File 106 | if f, err := builder.ReconstructFileFromRequestedChunks(pipeReader2); err != nil { 107 | t.Fatalf("Error reconstructing: %v", err) 108 | } else { 109 | fileB = f 110 | defer f.Dispose() 111 | } 112 | 113 | _ = fileB 114 | assertEqual(t, fileA.Open(), fileB.Open()) 115 | } 116 | 117 | func assertEqual(t *testing.T, a, b io.ReadCloser) { 118 | bufA := make([]byte, 1) 119 | bufB := make([]byte, 1) 120 | count := 0 121 | for { 122 | nA, errA := a.Read(bufA) 123 | nB, errB := b.Read(bufB) 124 | if nA != nB { 125 | t.Fatal("Chunks differ in total") 126 | } 127 | if errA != errB { 128 | t.Fatalf("Error a:%v b:%v", errA, errB) 129 | } 130 | if bufA[0] != bufB[0] { 131 | t.Fatalf("Chunks differ in content at position %v: %02x vs %02x", count, bufA[0], bufB[0]) 132 | } 133 | if errA == io.EOF && errB == io.EOF { 134 | break 135 | } 136 | count++ 137 | } 138 | check(t, "closing file a in assertEqual", a.Close()) 139 | check(t, "closing file b in assertEqual", b.Close()) 140 | } 141 | 142 | func createSimilarData(tempA, tempB io.Writer, p, sigma, avgchunk float64, numchunks int) error { 143 | for numchunks > 0 { 144 | numchunks-- 145 | lengthA := int(avgchunk*sigma*rand.NormFloat64() + avgchunk) 146 | if lengthA < 16 { 147 | lengthA = 16 148 | } 149 | data := randomBytes(lengthA) 150 | if _, err := tempA.Write(data); err != nil { 151 | return err 152 | } 153 | same := rand.Float64() <= p 154 | if same { 155 | if _, err := tempB.Write(data); err != nil { 156 | return err 157 | } 158 | } else { 159 | lengthB := int(avgchunk*sigma*rand.NormFloat64() + avgchunk) 160 | if lengthB < 16 { 161 | lengthB = 16 162 | } 163 | data = randomBytes(lengthB) 164 | if _, err := tempB.Write(data); err != nil { 165 | return err 166 | } 167 | } 168 | } 169 | return nil 170 | } 171 | 172 | func randomBytes(length int) []byte { 173 | result := make([]byte, 0, length) 174 | for len(result) < length { 175 | result = append(result, byte(rand.Int())) 176 | } 177 | return result 178 | } 179 | 180 | type testPrinter struct { 181 | t *testing.T 182 | } 183 | 184 | func (t *testPrinter) Printf(format string, v ...interface{}) { 185 | t.t.Logf(format, v...) 186 | } 187 | 188 | func reportUsage(t *testing.T, name string, store cafs.BoundedStorage) { 189 | store.FreeCache() 190 | ui := store.GetUsageInfo() 191 | if ui.Locked != 0 { 192 | t.Errorf(" Store %v: %v", name, ui) 193 | store.DumpStatistics(&testPrinter{t}) 194 | t.FailNow() 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /remotesync/send.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package remotesync 18 | 19 | import ( 20 | "errors" 21 | "fmt" 22 | "github.com/indyjo/cafs" 23 | "github.com/indyjo/cafs/remotesync/shuffle" 24 | "io" 25 | "log" 26 | ) 27 | 28 | // By passing a callback function to some of the transmissions functions, 29 | // the caller may subscribe to the current transmission status. 30 | type TransferStatusCallback func(bytesToTransfer, bytesTransferred int64) 31 | 32 | // Interface Chunks allows iterating over any sequence of chunks. 33 | type Chunks interface { 34 | // Function NextChunk returns either of three cases: 35 | // - A File, nil (good case) 36 | // - nil, io.EOF (terminal case: end of stream) 37 | // - nil, an error (terminal case: an error occurred) 38 | // It is the caller's duty to call Dispose() on the file returned. 39 | NextChunk() (cafs.File, error) 40 | 41 | // Function Dispose must be called when this object is no longer used. 42 | Dispose() 43 | } 44 | 45 | // Function ChunksOfFiles returns the chunks of a File as an implementation of the Chunks 46 | // interface. It's the caller's responsibility to call Dispose() on the returned object. 47 | func ChunksOfFile(file cafs.File) Chunks { 48 | return chunksOfFile{iter: file.Chunks()} 49 | } 50 | 51 | // Struct chunksOfFile is a minimal wrapper around a FileIterator that implements 52 | // the Chunks interface. 53 | type chunksOfFile struct { 54 | iter cafs.FileIterator 55 | } 56 | 57 | func (c chunksOfFile) NextChunk() (cafs.File, error) { 58 | if c.iter.Next() { 59 | return c.iter.File(), nil 60 | } 61 | return nil, io.EOF 62 | } 63 | 64 | func (c chunksOfFile) Dispose() { 65 | c.iter.Dispose() 66 | } 67 | 68 | // Iterates over a wishlist (read from `r` and pertaining to a permuted order of hashes), 69 | // and calls `f` for each chunk of `file`, requested or not. 70 | // If `f` returns an error, aborts the iteration and also returns the error. 71 | func forEachChunk(chunks Chunks, r io.ByteReader, perm shuffle.Permutation, f func(chunk cafs.File, requested bool) error) error { 72 | bits := newBitReader(r) 73 | 74 | // Prepare shuffler for iterating the file's chunks in shuffled order, matching them with 75 | // whishlist bits and calling `f` for each chunk, requested or not. 76 | shuffler := shuffle.NewStreamShuffler(perm, nil, func(v interface{}) error { 77 | var requested bool 78 | if b, err := bits.ReadBit(); err != nil { 79 | return fmt.Errorf("error reading from wishlist bitstream: %v", err) 80 | } else { 81 | requested = b 82 | } 83 | 84 | if v == nil { 85 | // This is a placeholder key generated by the shuffler. Require that the receiver 86 | // signalled not to request the corresponding chunk. 87 | if requested { 88 | return errors.New("receiver requested the empty chunk") 89 | } 90 | // otherwise, there's nothing to do 91 | return nil 92 | } 93 | 94 | // We have a chunk with a corresponding wishlist bit. Dispatch to delegate function. 95 | chunk := v.(cafs.File) 96 | err := f(chunk, requested) 97 | chunk.Dispose() 98 | return err 99 | }) 100 | 101 | // At the end of this function, we must make sure that all chunks still stored 102 | // in the shuffler are disposed of. 103 | defer func() { 104 | s := shuffler.WithFunc(func(v interface{}) error { 105 | if v != nil { 106 | v.(cafs.File).Dispose() 107 | } 108 | return nil 109 | }) 110 | _ = s.End() 111 | }() 112 | 113 | // Iterate through the chunks and put their keys into the shuffler. 114 | for { 115 | if chunk, err := chunks.NextChunk(); err == nil { 116 | if err := shuffler.Put(chunk); err != nil { 117 | return err 118 | } 119 | } else if err == io.EOF { 120 | break 121 | } else { 122 | return err 123 | } 124 | 125 | } 126 | if err := shuffler.End(); err != nil { 127 | return err 128 | } 129 | 130 | // Expect whishlist byte stream to be read completely 131 | if _, err := r.ReadByte(); err != io.EOF { 132 | return errors.New("wishlist too long") 133 | } 134 | return nil 135 | } 136 | 137 | // Writes a stream of chunk length / data pairs, permuted by a shuffler corresponding to `perm`, 138 | // into an io.Writer, based on the chunks of a file and a matching permuted wishlist of requested chunks, 139 | // read from `r`. 140 | func WriteChunkData(chunks Chunks, bytesToTransfer int64, r io.ByteReader, perm shuffle.Permutation, w FlushWriter, cb TransferStatusCallback) error { 141 | if LoggingEnabled { 142 | log.Printf("Sender: Begin WriteChunkData") 143 | defer log.Printf("Sender: End WriteChunkData") 144 | } 145 | 146 | // Determine the number of bytes to transmit by starting at the maximum and subtracting chunk 147 | // total whenever we read a 0 (chunk not requested) 148 | if cb != nil { 149 | cb(bytesToTransfer, 0) 150 | } 151 | 152 | // Iterate requested chunks. Write the chunk's length (as varint) and the chunk data 153 | // into the output writer. Update the number of bytes transferred on the go. 154 | var bytesTransferred int64 155 | return forEachChunk(chunks, r, perm, func(chunk cafs.File, requested bool) error { 156 | if requested { 157 | if err := writeVarint(w, chunk.Size()); err != nil { 158 | return err 159 | } 160 | r := chunk.Open() 161 | if n, err := io.Copy(w, r); err != nil { 162 | _ = r.Close() 163 | return err 164 | } else { 165 | w.Flush() 166 | bytesTransferred += n 167 | } 168 | if err := r.Close(); err != nil { 169 | return err 170 | } 171 | } else { 172 | bytesToTransfer -= chunk.Size() 173 | } 174 | if cb != nil { 175 | // Notify callback of status 176 | cb(bytesToTransfer, bytesTransferred) 177 | } 178 | return nil 179 | }) 180 | } 181 | -------------------------------------------------------------------------------- /remotesync/shuffle/shuffle.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2017 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | // Package shuffle implements an efficient algorithm for performing a 18 | // cyclic permutation on a possibly infinite stream of data elements. 19 | package shuffle 20 | 21 | import "math/rand" 22 | 23 | // Type Permutation contains a permutation of integer numbers 0..k-1, 24 | // where k is the length of the permutation cycle. 25 | type Permutation []int 26 | 27 | // Type Shuffler implements a buffer for permuting a stream of 28 | // data elements. 29 | // 30 | // Data elements are put into a Shuffler and retrieved from it 31 | // in a different order. Internally using a buffer of size k, each 32 | // data element is retrieved from the buffer up to k-1 steps after 33 | // it is put in. A stream of data elemnts shuffled this way is 34 | // reversible to its original order. 35 | type Shuffler struct { 36 | perm Permutation 37 | buffer []interface{} 38 | idx int 39 | } 40 | 41 | // Interface StreamShuffler is common for shufflers and unshufflers working on a 42 | // stream with a well-defined beginning and end. 43 | type StreamShuffler interface { 44 | // Puts one data element into the StreamShuffler. Calls the ConsumeFunc exactly once. 45 | Put(interface{}) error 46 | // Feeds remaining data from the buffer into the ConsumeFunc, calling it k-1 times. 47 | End() error 48 | // Returns a shallow copy of this StreamShuffler with a different ConsumeFunc. 49 | WithFunc(consume ConsumeFunc) StreamShuffler 50 | } 51 | 52 | // Type ConsumeFunc defines a function that accepts one parameter of 53 | // arbitrary type and returns an error. 54 | type ConsumeFunc func(interface{}) error 55 | 56 | // Functions of type applyFunc are used internally to apply a value returned from 57 | // the shuffler to a ConsumeFunc. This is necessary because shuffling and unshuffling 58 | // are not fully symmetric and require the substitution or removal of placeholder values. 59 | type applyFunc func(ConsumeFunc, interface{}) error 60 | 61 | // Type streamShuffler uses a Shuffler to permute a sequence of arbitrary length. 62 | type streamShuffler struct { 63 | consume ConsumeFunc 64 | shuffler *Shuffler 65 | apply applyFunc 66 | } 67 | 68 | // Creates a random permutation of given length. 69 | func Random(size int, r *rand.Rand) Permutation { 70 | return r.Perm(size) 71 | } 72 | 73 | // Given a permutation p, creates a complimentary permutation p' 74 | // such that using the output of a Shuffler based on p as the input 75 | // of a Shuffler based on p' restores the original stream order 76 | // delayed by len(p) - 1 steps. 77 | func (p Permutation) Inverse() Permutation { 78 | inv := make(Permutation, len(p)) 79 | for i, j := range p { 80 | inv[j] = (len(p) - 1 + i) % len(p) 81 | } 82 | return inv 83 | } 84 | 85 | // Creates a new Shuffler based on permutation p. 86 | func NewShuffler(p Permutation) *Shuffler { 87 | result := new(Shuffler) 88 | result.perm = p 89 | result.buffer = make([]interface{}, len(p)) 90 | return result 91 | } 92 | 93 | // Inputs a data element v into the shuffler and simultaneously 94 | // retrieves another (or, every k invocations, the same) data element. 95 | // May return nil while the buffer hasn't been completely filled. 96 | func (s *Shuffler) Put(v interface{}) interface{} { 97 | i := s.idx 98 | s.idx++ 99 | if s.idx == len(s.buffer) { 100 | s.idx = 0 101 | } 102 | s.buffer[s.perm[i]] = v 103 | return s.buffer[i] 104 | } 105 | 106 | // Returns a complimentary shuffler that reverses the permutation (except 107 | // for a delay of k-1 steps). 108 | func (s *Shuffler) Inverse() *Shuffler { 109 | return NewShuffler(s.perm.Inverse()) 110 | } 111 | 112 | // Returns the length k of the permutation buffer used by the shuffler. 113 | func (s *Shuffler) Length() int { 114 | return len(s.buffer) 115 | } 116 | 117 | // Creates a StreamShuffler applying a permutation to a stream. Argument `placeholder` 118 | // specifies a value that is inserted into the permuted stream in order to symbolize blank space. 119 | func NewStreamShuffler(p Permutation, placeholder interface{}, consume ConsumeFunc) StreamShuffler { 120 | return &streamShuffler{ 121 | consume: consume, 122 | shuffler: NewShuffler(p), 123 | apply: func(f ConsumeFunc, v interface{}) error { 124 | if v == nil { 125 | v = placeholder 126 | } 127 | return f(v) 128 | }, 129 | } 130 | } 131 | 132 | func (e *streamShuffler) Put(v interface{}) error { 133 | r := e.shuffler.Put(v) 134 | return e.apply(e.consume, r) 135 | } 136 | 137 | func (e *streamShuffler) End() error { 138 | for i := 0; i < e.shuffler.Length()-1; i++ { 139 | r := e.shuffler.Put(nil) 140 | if err := e.apply(e.consume, r); err != nil { 141 | return err 142 | } 143 | } 144 | return nil 145 | } 146 | 147 | func (e *streamShuffler) WithFunc(consume ConsumeFunc) StreamShuffler { 148 | var s streamShuffler 149 | s = *e 150 | s.consume = consume 151 | return &s 152 | } 153 | 154 | // Creates a StreamShuffler applying the inverse permutation and thereby restoring 155 | // the original stream order. Argument `placeholder` specifies blank space inserted 156 | // into the stream by the original shuffler. Values equal to `placeholder` will not 157 | // be forwarded to `consume`. 158 | func NewInverseStreamShuffler(p Permutation, placeholder interface{}, consume ConsumeFunc) StreamShuffler { 159 | return &streamShuffler{ 160 | consume: consume, 161 | shuffler: NewShuffler(p.Inverse()), 162 | apply: func(f ConsumeFunc, v interface{}) error { 163 | if v == nil || v == placeholder { 164 | return nil 165 | } 166 | return f(v) 167 | }, 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /remotesync/shuffle/shuffle_test.go: -------------------------------------------------------------------------------- 1 | package shuffle 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | ) 7 | 8 | func TestShuffler(t *testing.T) { 9 | rgen := rand.New(rand.NewSource(1)) 10 | for _, permSize := range []int{1, 2, 3, 4, 5, 7, 10, 31, 57, 127, 512} { 11 | perm := Random(permSize, rgen) 12 | inv := perm.Inverse() 13 | for _, dataSize := range []int{1, 2, 3, 4, 5, 7, 10, 31, 57, 127, 512, 1024} { 14 | delay := len(perm) - 1 15 | 16 | for repeat := 0; repeat < 100; repeat++ { 17 | data := Random(dataSize, rgen) 18 | 19 | forward := NewShuffler(perm) 20 | inverse := NewShuffler(inv) 21 | 22 | for i := 0; i < len(data)+delay; i++ { 23 | var v interface{} 24 | if i < len(data) { 25 | v = data[i] 26 | } else { 27 | v = nil 28 | } 29 | w := inverse.Put(forward.Put(v)) 30 | if i < delay { 31 | continue 32 | } 33 | if w.(int) != data[i-delay] { 34 | t.Errorf("When testing with permutations of size %v", permSize) 35 | t.Errorf(" and data of size %v,", dataSize) 36 | t.Fatalf(" a mismatch was detected on %vth iteration: w:%v != data[%v]:%v", i, w, i-delay, data[i-delay]) 37 | } 38 | } 39 | } 40 | } 41 | } 42 | } 43 | 44 | func TestStreamShuffler(t *testing.T) { 45 | permutations := []Permutation{ 46 | {0}, 47 | {0, 1}, 48 | {1, 0}, 49 | {3, 4, 2, 1, 0}, 50 | {4, 6, 3, 1, 5, 2, 0}, 51 | {6, 7, 5, 3, 2, 1, 4, 0}, 52 | { 53 | 24, 40, 90, 31, 11, 6, 26, 54, 76, 43, 79, 92, 7, 49, 17, 32, 80, 54 | 95, 15, 86, 20, 48, 94, 5, 27, 50, 65, 58, 38, 33, 60, 87, 36, 59, 55 | 85, 55, 23, 72, 47, 53, 39, 71, 96, 74, 82, 83, 28, 97, 62, 3, 45, 21, 56 | 2, 44, 70, 1, 25, 4, 68, 10, 19, 67, 77, 81, 51, 61, 35, 91, 84, 57, 57 | 16, 64, 78, 73, 93, 34, 29, 8, 30, 9, 66, 89, 52, 22, 18, 56, 13, 46, 58 | 69, 75, 88, 41, 42, 63, 12, 37, 14, 0, 59 | }, 60 | } 61 | strings := []string{ 62 | "", 63 | "x", 64 | "xy", 65 | "xyz", 66 | "0123456789abcde", 67 | "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 68 | } 69 | for _, perm := range permutations { 70 | for _, str := range strings { 71 | testWith(t, perm, str) 72 | } 73 | } 74 | } 75 | 76 | func testWith(t *testing.T, perm Permutation, original string) { 77 | shuffled := shuffleString(t, original, NewStreamShuffler(perm, '_', nil)) 78 | unshuffled := shuffleString(t, shuffled, NewInverseStreamShuffler(perm, '_', nil)) 79 | 80 | if original != unshuffled { 81 | t.Errorf("When testing with %#v (inverse: %#v):", perm, perm.Inverse()) 82 | t.Fatalf(" Shuffling and unshuffling returned %#v. Expected: %#v. Shuffled: %#v", 83 | unshuffled, original, shuffled) 84 | } 85 | } 86 | 87 | func shuffleString(t *testing.T, in string, s StreamShuffler) (out string) { 88 | f := func(v interface{}) error { 89 | out = out + string(v.(rune)) 90 | return nil 91 | } 92 | s = s.WithFunc(f) 93 | for _, c := range in { 94 | s.Put(c) 95 | } 96 | s.End() 97 | return 98 | } 99 | 100 | // Test that doing multiple simultaneous transmissions via a buffered connection to a simultated 101 | // caching receiver actually reduces the amount of data per transmission to the expected degree. 102 | func TestTransmission(t *testing.T) { 103 | const NTRANSMISSIONS = 3 104 | const BUFFER_SIZE = 1000 105 | const PERMUTATION_SIZE = 8000 106 | 107 | // Simulate a cache of already received data. The value expresses at which time step the data is received. 108 | // That's how we can simulate buffering behavior. 109 | received := make(map[int]int) 110 | // Counter of data that was actually sent 111 | sent := 0 112 | 113 | // Time step. 114 | time := 0 115 | 116 | // Create a number of shufflers that simulate parallel transmissions. 117 | transmissions := make([]StreamShuffler, NTRANSMISSIONS) 118 | 119 | r := rand.New(rand.NewSource(0)) 120 | for i := range transmissions { 121 | transmissions[i] = NewStreamShuffler(Random(PERMUTATION_SIZE, r), -1, func(v interface{}) error { 122 | if v.(int) >= 0 { 123 | // Test if data has arrived at cache yet. If not, put it in and trigger retransmission 124 | if t_received, ok := received[v.(int)]; !ok || time < t_received { 125 | if !ok { 126 | received[v.(int)] = time + BUFFER_SIZE 127 | } 128 | sent++ 129 | } 130 | } 131 | return nil 132 | }) 133 | } 134 | 135 | for i := 0; i < PERMUTATION_SIZE; i++ { 136 | for _, transmission := range transmissions { 137 | if err := transmission.Put(i); err != nil { 138 | t.Fatalf("Unexpected error: %v", err) 139 | } 140 | } 141 | time++ 142 | } 143 | 144 | // Purposefully don't call End() on transmissions because that would not simulate parallel behavior. 145 | // Instead, flood the shufflers with a dummy value in parallel. By putting PERMUTATION_SIZE 146 | // dummy values per transmission, we can guarantee the buffer was flushed. 147 | for i := 0; i < PERMUTATION_SIZE; i++ { 148 | for _, transmission := range transmissions { 149 | if err := transmission.Put(-1); err != nil { 150 | t.Fatalf("Unexpected error: %v", err) 151 | } 152 | } 153 | time++ 154 | } 155 | 156 | transmission_avg := float64(sent) / float64(PERMUTATION_SIZE) 157 | t.Logf("Transmissions/data: % 5.2f", transmission_avg) 158 | 159 | // This model is probably wrong because it assumes stochastic independence of data chunk transmissions 160 | t.Logf("Expected: % 5.2f", 1+float64(NTRANSMISSIONS-1)*float64(BUFFER_SIZE)/float64(PERMUTATION_SIZE)) 161 | // TODO: Add actual test here 162 | } 163 | -------------------------------------------------------------------------------- /remotesync/syncinfo.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2019 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package remotesync 18 | 19 | import ( 20 | "bufio" 21 | "fmt" 22 | "github.com/indyjo/cafs" 23 | "github.com/indyjo/cafs/chunking" 24 | "github.com/indyjo/cafs/remotesync/shuffle" 25 | "io" 26 | ) 27 | 28 | // Struct SyncInfo contains information which two CAFS instances have to agree on before 29 | // transmitting a file. 30 | type SyncInfo struct { 31 | Chunks []ChunkInfo // hashes and sizes of chunks 32 | Perm shuffle.Permutation // the permutation of chunks to use when transferring 33 | } 34 | 35 | // Func SetNoPermutation sets the prmutation to the trivial permutation (the one that doesn't permute). 36 | func (s *SyncInfo) SetTrivialPermutation() { 37 | s.Perm = []int{0} 38 | } 39 | 40 | // Func SetPermutation sets the permutation to use when transferring chunks. 41 | func (s *SyncInfo) SetPermutation(perm shuffle.Permutation) { 42 | s.Perm = append(s.Perm[:0], perm...) 43 | } 44 | 45 | // Func SetChunksFromFile prepares sync information for a CAFS file. 46 | func (s *SyncInfo) SetChunksFromFile(file cafs.File) { 47 | if !file.IsChunked() { 48 | s.Chunks = append(s.Chunks[:0], ChunkInfo{ 49 | Key: file.Key(), 50 | Size: intsize(file.Size()), 51 | }) 52 | return 53 | } 54 | 55 | iter := file.Chunks() 56 | s.Chunks = s.Chunks[:0] 57 | for iter.Next() { 58 | s.addChunk(iter.Key(), iter.Size()) 59 | } 60 | iter.Dispose() 61 | } 62 | 63 | // func ReadFromLegacyStream reads chunk hashes from a stream encoded in the format previously used. No permutation 64 | // data is sent and it is expected that permutation remain the trivial permutation {0}. 65 | func (s *SyncInfo) ReadFromLegacyStream(stream io.Reader) error { 66 | // We need ReadByte 67 | r := bufio.NewReader(stream) 68 | 69 | for { 70 | // Read a chunk hash and its size 71 | var key cafs.SKey 72 | if _, err := io.ReadFull(r, key[:]); err == io.EOF { 73 | break 74 | } else if err != nil { 75 | return fmt.Errorf("error reading chunk hash: %v", err) 76 | } 77 | var size int64 78 | if l, err := readChunkLength(r); err != nil { 79 | return fmt.Errorf("error reading size of chunk: %v", err) 80 | } else { 81 | size = l 82 | } 83 | 84 | s.addChunk(key, size) 85 | } 86 | return nil 87 | } 88 | 89 | // Func WriteToLegacyStream writes chunk hashes to a stream encoded in the format previously used. 90 | func (s *SyncInfo) WriteToLegacyStream(stream io.Writer) error { 91 | for _, ci := range s.Chunks { 92 | if _, err := stream.Write(ci.Key[:]); err != nil { 93 | return err 94 | } 95 | if err := writeVarint(stream, int64(ci.Size)); err != nil { 96 | return err 97 | } 98 | } 99 | return nil 100 | } 101 | 102 | func (s *SyncInfo) addChunk(key cafs.SKey, size int64) { 103 | s.Chunks = append(s.Chunks, ChunkInfo{key, intsize(size)}) 104 | } 105 | 106 | func intsize(size int64) int { 107 | if size < 0 || size > chunking.MaxChunkSize { 108 | panic("invalid chunk total") 109 | } 110 | return int(size) 111 | } 112 | 113 | // Applies the permutation contained within the receiver to it's own list of chunks, returning 114 | // a SyncInfo with a permuted list of chunks and the trivial permutation. 115 | // This is interesting for assistive transfers where a file is offered for retrieval while it is 116 | // still being retrieved from a different source. In that case, forwarding file chunks in a 117 | // different shuffle order than the one retrieved would lead to unnecessary delays waiting for 118 | // a certain chunk while others are already available. 119 | func (s *SyncInfo) Shuffle() *SyncInfo { 120 | newChunks := make([]ChunkInfo, 0, len(s.Chunks)) 121 | shuffler := shuffle.NewStreamShuffler(s.Perm, nil, func(v interface{}) error { 122 | if v != nil { 123 | newChunks = append(newChunks, v.(ChunkInfo)) 124 | } 125 | return nil 126 | }) 127 | for _, c := range s.Chunks { 128 | _ = shuffler.Put(c) 129 | } 130 | _ = shuffler.End() 131 | return &SyncInfo{ 132 | Chunks: newChunks, 133 | Perm: shuffle.Permutation{0}, 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /remotesync/syncinfo_test.go: -------------------------------------------------------------------------------- 1 | package remotesync 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "github.com/indyjo/cafs" 7 | "testing" 8 | ) 9 | 10 | func TestSyncInfoJSON(t *testing.T) { 11 | s := SyncInfo{} 12 | s.addChunk(cafs.SKey{11, 22, 33, 44, 55, 66, 77, 88}, 1337) 13 | s.addChunk(cafs.SKey{11, 22, 33, 44, 55, 66, 77, 88}, 1337) 14 | b, err := json.Marshal(s) 15 | if err != nil { 16 | t.Fatalf("Error encoding: %v", err) 17 | } 18 | // t.Logf("%v", string(b)) 19 | 20 | s2 := SyncInfo{} 21 | err = json.Unmarshal(b, &s2) 22 | if err != nil { 23 | t.Fatalf("Error decoding: %v", err) 24 | } 25 | 26 | b2, err := json.Marshal(s2) 27 | if err != nil { 28 | t.Fatalf("Error encoding: %v", err) 29 | } 30 | 31 | if !bytes.Equal(b, b2) { 32 | t.Fatalf("Encoding differs") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /remotesync/util.go: -------------------------------------------------------------------------------- 1 | // BitWrk - A Bitcoin-friendly, anonymous marketplace for computing power 2 | // Copyright (C) 2013-2018 Jonas Eschenburg 3 | // 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // This program is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this program. If not, see . 16 | 17 | package remotesync 18 | 19 | import ( 20 | "bufio" 21 | "encoding/binary" 22 | "fmt" 23 | "github.com/indyjo/cafs" 24 | "github.com/indyjo/cafs/chunking" 25 | "io" 26 | "net/http" 27 | ) 28 | 29 | // Interface FlushWriter acts like an io.Writer with an additional Flush method. 30 | type FlushWriter interface { 31 | io.Writer 32 | Flush() 33 | } 34 | 35 | // Struct SimpleFlushWriter implements FlushWriter using a Writer and a Flusher. 36 | type SimpleFlushWriter struct { 37 | W io.Writer 38 | F http.Flusher 39 | } 40 | 41 | func (s SimpleFlushWriter) Write(p []byte) (n int, err error) { 42 | return s.W.Write(p) 43 | } 44 | 45 | func (s SimpleFlushWriter) Flush() { 46 | s.F.Flush() 47 | } 48 | 49 | // An implementation of FlushWriter whose Flush() function is a nop. 50 | type NopFlushWriter struct { 51 | W io.Writer 52 | } 53 | 54 | func (f NopFlushWriter) Write(p []byte) (n int, err error) { 55 | return f.W.Write(p) 56 | } 57 | 58 | func (f NopFlushWriter) Flush() { 59 | } 60 | 61 | // The key pertaining to the SHA256 of an empty string is used to represent placeholders 62 | // for empty slots generated by shuffled transmissions. 63 | var emptyKey = *cafs.MustParseKey("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") 64 | 65 | // Type ChunkInfo contains a chunk's hash and size. 66 | type ChunkInfo struct { 67 | Key cafs.SKey 68 | Size int 69 | } 70 | 71 | var emptyChunkInfo = ChunkInfo{emptyKey, 0} 72 | 73 | func readChunkLength(r *bufio.Reader) (int64, error) { 74 | if l, err := binary.ReadVarint(r); err != nil { 75 | return 0, err 76 | } else if l < 0 || l > chunking.MaxChunkSize { 77 | return 0, fmt.Errorf("Illegal chunk length: %v", l) 78 | } else { 79 | return l, nil 80 | } 81 | } 82 | 83 | func writeVarint(w io.Writer, value int64) error { 84 | var buf [binary.MaxVarintLen64]byte 85 | _, err := w.Write(buf[:binary.PutVarint(buf[:], value)]) 86 | return err 87 | } 88 | 89 | type bitWriter struct { 90 | w FlushWriter 91 | n int 92 | buf [1]byte 93 | } 94 | 95 | func newBitWriter(writer FlushWriter) *bitWriter { 96 | return &bitWriter{w: writer} 97 | } 98 | 99 | func (w *bitWriter) WriteBit(b bool) (err error) { 100 | if b { 101 | w.buf[0] = (w.buf[0] << 1) | 1 102 | } else { 103 | w.buf[0] = w.buf[0] << 1 104 | } 105 | w.n++ 106 | if w.n == 8 { 107 | _, err = w.w.Write(w.buf[:]) 108 | if err == nil { 109 | w.w.Flush() 110 | } 111 | w.n = 0 112 | } 113 | return 114 | } 115 | 116 | func (w *bitWriter) Flush() (err error) { 117 | for err == nil && w.n != 0 { 118 | err = w.WriteBit(false) 119 | } 120 | return 121 | } 122 | 123 | type bitReader struct { 124 | r io.ByteReader 125 | n uint 126 | b byte 127 | } 128 | 129 | func newBitReader(r io.ByteReader) *bitReader { 130 | return &bitReader{r: r, n: 0, b: 0} 131 | } 132 | 133 | func (r *bitReader) ReadBit() (bit bool, err error) { 134 | if r.n == 8 { 135 | r.n = 0 136 | } 137 | if r.n == 0 { 138 | r.b, err = r.r.ReadByte() 139 | if err != nil { 140 | return 141 | } 142 | } 143 | n := r.n 144 | r.n++ 145 | bit = 0 != (0x80 & (r.b << n)) 146 | return 147 | } 148 | 149 | // Function readChunk reads a single chunk worth of data from stream `r` into a new 150 | // file on FileStorage `s`. 151 | // The expected encoding is (varint, data...). 152 | func readChunk(s cafs.FileStorage, r *bufio.Reader, info string) (cafs.File, error) { 153 | var length int64 154 | if n, err := readChunkLength(r); err != nil { 155 | return nil, err 156 | } else { 157 | length = n 158 | } 159 | tempChunk := s.Create(info) 160 | defer tempChunk.Dispose() 161 | if _, err := io.CopyN(tempChunk, r, length); err != nil { 162 | return nil, err 163 | } 164 | if err := tempChunk.Close(); err != nil { 165 | return nil, err 166 | } 167 | return tempChunk.File(), nil 168 | } 169 | --------------------------------------------------------------------------------