├── .gitignore ├── LICENSE ├── README.md ├── hg-decode.pl ├── rip-bzr.pl ├── rip-cvs.pl ├── rip-git.pl ├── rip-hg.pl └── rip-svn.pl /.gitignore: -------------------------------------------------------------------------------- 1 | blib/ 2 | .build/ 3 | _build/ 4 | cover_db/ 5 | inc/ 6 | Build 7 | Build.bat 8 | .last_cover_stats 9 | Makefile 10 | Makefile.old 11 | MANIFEST.bak 12 | META.yml 13 | MYMETA.yml 14 | nytprof.out 15 | pm_to_blib 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Rawsec's CyberSecurity Inventory](https://inventory.rawsec.ml/img/badges/Rawsec-inventoried-FF5050_flat.svg)](https://inventory.rawsec.ml/tools.html#dvcs-ripper) 2 | [![GitHub stars](https://img.shields.io/github/stars/kost/dvcs-ripper.svg)](https://github.com/kost/dvcs-ripper/stargazers) 3 | [![GitHub license](https://img.shields.io/github/license/kost/dvcs-ripper.svg)](https://github.com/kost/dvcs-ripper/blob/master/LICENSE) 4 | 5 | dvcs-ripper 6 | =========== 7 | 8 | Rip web accessible (distributed) version control systems: SVN, GIT, Mercurial/hg, bzr, ... 9 | 10 | It can rip repositories even when directory browsing is turned off. 11 | 12 | Make sure to position yourself in empty directory where you want repositories to be downloaded/cloned. 13 | 14 | ## Requirements 15 | 16 | - Perl 17 | - Perl modules: 18 | - required: LWP, IO::Socket::SSL 19 | - for newer SVN: DBD::SQlite and DBI 20 | - for faster GIT: Parallel::ForkManager, Redis and Algorithm::Combinatorics 21 | - (D)VCS client of what you want to rip (cvs, svn, git, hg, bzr, ...) 22 | 23 | ### Requirements on Debian/Ubuntu 24 | 25 | You can easily install perl requirements: 26 | 27 | `sudo apt-get install perl libio-socket-ssl-perl libdbd-sqlite3-perl libclass-dbi-perl libio-all-lwp-perl` 28 | 29 | Optional requirements (faster git rip): 30 | `sudo apt-get install libparallel-forkmanager-perl libredis-perl libalgorithm-combinatorics-perl` 31 | 32 | And if you need all clients supported: 33 | 34 | `sudo apt-get install cvs subversion git bzr mercurial` 35 | 36 | ## Docker 37 | 38 | In case you just want docker version, it is here: 39 | 40 | https://github.com/kost/docker-webscan/tree/master/alpine-dvcs-ripper 41 | 42 | Just say something like: 43 | 44 | `docker run --rm -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -v -u http://www.example.org/.git` 45 | 46 | 47 | GIT 48 | =========== 49 | Example run (for git): 50 | 51 | `rip-git.pl -v -u http://www.example.com/.git/` 52 | 53 | It will automatically do `git checkout -f` 54 | 55 | or if you would like to ignore SSL certification verification (with -s): 56 | 57 | `rip-git.pl -s -v -u http://www.example.com/.git/` 58 | 59 | Mercurial/HG 60 | =========== 61 | Example run (for hg): 62 | 63 | `rip-hg.pl -v -u http://www.example.com/.hg/` 64 | 65 | It will automatically do `hg revert ` 66 | 67 | or if you would like to ignore SSL certification verification (with -s): 68 | 69 | `rip-hg.pl -s -v -u http://www.example.com/.hg/` 70 | 71 | Bazaar/bzr 72 | =========== 73 | Example run (for bzr): 74 | 75 | `rip-bzr.pl -v -u http://www.example.com/.bzr/` 76 | 77 | It will automatically do `bzr revert` 78 | 79 | or if you would like to ignore SSL certification verification (with -s): 80 | 81 | `rip-bzr.pl -s -v -u http://www.example.com/.bzr/` 82 | 83 | 84 | SVN 85 | =========== 86 | It supports OLDER and NEWER version of svn client formats. Older is with .svn files in every directory, while 87 | newer version have single .svn directory and wc.db in .svn directory. It will automatically detect which 88 | format is used on the target. 89 | 90 | Example run (for SVN): 91 | 92 | `rip-svn.pl -v -u http://www.example.com/.svn/` 93 | 94 | It will automatically do `svn revert -R .` 95 | 96 | CVS 97 | =========== 98 | Example run (for CVS): 99 | 100 | `rip-cvs.pl -v -u http://www.example.com/CVS/` 101 | 102 | This will not rip CVS, but it will display useful info. 103 | 104 | ## Advance usage examples 105 | 106 | Some examples how it can be used 107 | 108 | ### Output handling 109 | 110 | Download git tree to specific output dir: 111 | 112 | `rip-git.pl -o /my/previously/made/dir -v -u http://www.example.com/.git/` 113 | 114 | Download git tree to specific output dir (creating dir `http__www.example.com_.git_` for url): 115 | 116 | `rip-git.pl -m -o /dir -v -u http://www.example.com/.git/` 117 | 118 | ### Redis usage with docker 119 | 120 | Create Redis docker container: 121 | 122 | `docker run --rm --name myredis -it -v /my/host/dir/data:/data:rw k0st/alpine-redis` 123 | 124 | In another terminal, just link redis container and say something like this: 125 | 126 | `docker run --rm --link=myredis:redis -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -e docker -v -u http://www.example.org/.git -m -o /work` 127 | 128 | ### Using redis for resuming work of ripping 129 | 130 | Create Redis docker container: 131 | 132 | `docker run --name redisdvcs -it -v /my/host/dir/data:/data:rw k0st/alpine-redis` 133 | 134 | In another terminal, just link redis container and say something like this: 135 | 136 | `docker run --link=redisdvcs:redis -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -n -e docker -v -u http://www.example.org/.git -m -o /work` 137 | 138 | ### Abusing redis for massive parallel tasks 139 | 140 | Create global NFS and mount /work on each client. Create global Redis docker container: 141 | 142 | `docker run --name redisdvcs -it -v /my/host/dir/data:/data:rw k0st/alpine-redis` 143 | 144 | In another terminal, just link redis container and say something like this on 1st client 145 | 146 | `docker run -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -n -e global.docker.ip -v -u http://www.example.org/.git -t 10 -c -m -o /work` 147 | 148 | In another terminal, just link redis container and say something like this on 2nd client: 149 | 150 | `docker run -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -n -e global.docker.ip -v -u http://www.example.org/.git -t 10 -c -m -o /work` 151 | 152 | and so on... 153 | 154 | You need to perform `git checkout -f` yourself on the end - of course! 155 | 156 | ## Future 157 | 158 | Feel free to implement something and send pull request. Feel free to suggest any feature. Lot of features 159 | actually were implemented by request 160 | 161 | ### ToDo 162 | - [ ] Recognize 404 pages which return 200 in SVN/CVS 163 | - [ ] Try to repeat each trick after previous trick was successful 164 | - [ ] Progress bars 165 | 166 | ### Done 167 | - [x] Support for brute forcing pack names 168 | - [x] Intelligent guessing of packed refs 169 | - [x] Support for objects/info/packs from https://www.kernel.org/pub/software/scm/git/docs/gitrepository-layout.html 170 | - [x] Recognize 404 pages which return 200 171 | - [x] Introduce ignore SSL/TLS verification in SVN/CVS 172 | - [x] Bzr support 173 | 174 | -------------------------------------------------------------------------------- /hg-decode.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use IO::Uncompress::Inflate qw(inflate $InflateError); 4 | use File::Path qw(make_path); 5 | use LWP::UserAgent; 6 | use File::Temp qw(tempfile tempdir); 7 | 8 | # First grab the database file 9 | my $target=$ARGV[0]; 10 | my $hgurl="http://$ARGV[0]/.hg/dirstate"; 11 | my $ua=LWP::UserAgent->new; 12 | $ua->agent("All Your Files Are Belong To Us/1.0"); 13 | my $request=HTTP::Request->new(GET => $hgurl); 14 | my $result=$ua->request($request); 15 | 16 | if ($result->status_line !~ /^200/) 17 | { 18 | die "Could not find Mercurial database"; 19 | } 20 | 21 | my ($dbfileh, $dbfilen) = tempfile(); 22 | print $dbfileh $result->content; 23 | close $dbfileh; 24 | 25 | open(my $infile, "<", $dbfilen); 26 | binmode($infile); 27 | 28 | my $rawdata; 29 | my $p1; 30 | my $p2; 31 | 32 | read $infile, my $rawdata, 20; 33 | ($p1)=unpack("H*", $rawdata); 34 | read $infile, my $rawdata, 20; 35 | ($p2)=unpack("H*", $rawdata); 36 | 37 | my @index_entries = (); 38 | my $entries=0; 39 | 40 | do 41 | { 42 | my $entry = {}; 43 | my $rawdata; 44 | 45 | read $infile, $rawdata, 17; 46 | 47 | ( $entry->{'status'}, 48 | $entry->{'mode'}, 49 | $entry->{'size'}, 50 | $entry->{'mtime'}, 51 | $entry->{'length'} ) = unpack "CNNNN", $rawdata; 52 | 53 | read $infile, $rawdata, $entry->{'length'}; 54 | ( $entry->{'name'} ) = unpack "a" . $entry->{'length'}, $rawdata; 55 | 56 | push(@index_entries, $entry); 57 | 58 | } while (!eof($infile)); 59 | close($infile); 60 | unlink($dbfilen); 61 | my $server=$ARGV[0]; 62 | 63 | # Now extract the files 64 | foreach my $entry (@index_entries) 65 | { 66 | my $indexfile=".hg/store/data/" . $entry->{'name'}; 67 | my $indexfh; 68 | my $rawdata; 69 | my $datafile=0; 70 | 71 | print "Extracting " . $entry->{'name'} . "\n"; 72 | 73 | # mangle indexfile for the upper case wankery mercurial does 74 | $indexfile =~ s/_/__/g; 75 | $indexfile =~ s/([A-Z])/_\l$1/g; 76 | my $mangledname=""; 77 | 78 | foreach my $char (split(//,$indexfile)) 79 | { 80 | my $result=$char; 81 | if ($char lt ' ' || $char gt '~') 82 | { 83 | $result='~' . unpack(H2, $char); 84 | } 85 | $mangledname.=$result; 86 | } 87 | 88 | my $hgurl="http://$server/$mangledname" . ".i"; 89 | my $fua=LWP::UserAgent->new; 90 | $fua->agent("All Your Files Are Belong To Us/1.0"); 91 | my $frequest=HTTP::Request->new(GET => $hgurl); 92 | my $fresult=$fua->request($frequest); 93 | 94 | my ($dbfileh, $dbfilen) = tempfile(); 95 | print $dbfileh $fresult->content; 96 | close $dbfileh; 97 | 98 | open $indexfh, "<", $dbfilen; 99 | binmode($indexfh); 100 | 101 | $hgurl="http://$server/$mangledname" . ".d"; 102 | $frequest=HTTP::Request->new(GET => $hgurl); 103 | $fresult=$fua->request($frequest); 104 | if ($fresult->status_line =~ /^200/) 105 | { 106 | my ($dfileh, $dfilen) = tempfile(); 107 | print $dfileh $fresult->content; 108 | close $dfileh; 109 | open $datafh, "<", $dfilen; 110 | $datafile=1; 111 | } 112 | 113 | # Make sure the path is there for the output 114 | my $outputpath="output/" . $entry->{'name'}; 115 | $outputpath =~ s#/[^/]*$##g; 116 | 117 | make_path($outputpath); 118 | open $oh, ">", "output/$entry->{'name'}"; 119 | 120 | do 121 | { 122 | my $head={}; 123 | 124 | read $indexfh, $rawdata, 6; 125 | my $msb, $nmsb=0; 126 | ( $msb, $nmsb, $head->{'offset'} ) = unpack "CCN", $rawdata; 127 | $head{'offset'} = head->{'offset'} + ($nmsb << 32) + ($msb << 40); 128 | 129 | read $indexfh, $rawdata, 58; 130 | 131 | ( $head->{'flags'}, 132 | $head->{'clength'}, 133 | $head->{'ulength'}, 134 | $head->{'base'}, 135 | $head->{'link'}, 136 | $head->{'p1'}, 137 | $head->{'p2'}, 138 | $head->{'nodeid'} ) = unpack "SNNNNNNH*",$rawdata; 139 | 140 | # Now read the data 141 | my $cookeddata; 142 | if ($head->{'clength'} > 0) 143 | { 144 | if ($datafile == 1) 145 | { 146 | read $datafh, $rawdata, $head->{'clength'}; 147 | } 148 | else 149 | { 150 | read $indexfh, $rawdata, $head->{'clength'}; 151 | } 152 | inflate(\$rawdata => \$cookeddata); 153 | } 154 | 155 | # And write it 156 | print $oh $cookeddata; 157 | 158 | } while (!eof($indexfh)); 159 | 160 | close($indexfh); 161 | unlink($dbfilen); 162 | if ($datafile == 1) { close($datafh); unlink($dfilen) } 163 | close($oh); 164 | } -------------------------------------------------------------------------------- /rip-bzr.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | use IO::Socket::SSL; 6 | use LWP; 7 | use LWP::UserAgent; 8 | use HTTP::Request; 9 | use HTTP::Response; 10 | use Getopt::Long; 11 | use Cwd; 12 | 13 | my $configfile="$ENV{HOME}/.rip-bzr"; 14 | my %config; 15 | $config{'bzrdir'} = ".bzr"; 16 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'; 17 | $config{'verbose'}=0; 18 | $config{'checkout'}=1; 19 | 20 | $config{'respdetectmax'}=3; 21 | $config{'resp404size'}=256; 22 | $config{'resp404reqsize'}=32; 23 | 24 | sub randomstr { 25 | my($num) = @_; 26 | my @chars = ("A".."Z", "a".."z"); 27 | my $string; 28 | $string .= $chars[rand @chars] for 1..$num; 29 | return $string; 30 | } 31 | 32 | if (-e $configfile) { 33 | open(CONFIG,"<$configfile") or next; 34 | while () { 35 | chomp; # no newline 36 | s/#.*//; # no comments 37 | s/^\s+//; # no leading white 38 | s/\s+$//; # no trailing white 39 | next unless length; # anything left? 40 | my ($var, $value) = split(/\s*=\s*/, $_, 2); 41 | $config{$var} = $value; 42 | } 43 | close(CONFIG); 44 | } 45 | 46 | Getopt::Long::Configure ("bundling"); 47 | 48 | my $result = GetOptions ( 49 | "a|agent=s" => \$config{'agent'}, 50 | "b|branch=s" => \$config{'branch'}, 51 | "u|url=s" => \$config{'url'}, 52 | "p|proxy=s" => \$config{'proxy'}, 53 | "c|checkout!" => \$config{'checkout'}, 54 | "s|sslignore!" => \$config{'sslignore'}, 55 | "v|verbose+" => \$config{'verbose'}, 56 | "h|help" => \&help 57 | ); 58 | 59 | my @knownfiles=( 60 | 'branch-format', 61 | 'branch/branch.conf', 62 | 'branch/format', 63 | 'branch/last-revision', 64 | 'branch/tags', 65 | 'checkout/conflicts', 66 | 'checkout/dirstate', 67 | 'checkout/format', 68 | 'checkout/merge-hashes', 69 | 'checkout/views', 70 | 'repository/format', 71 | 'repository/pack-names' 72 | ); 73 | 74 | my $ua = LWP::UserAgent->new; 75 | 76 | $ua->agent($config{'agent'}); 77 | 78 | if ($config{'sslignore'}) { 79 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0); 80 | } 81 | if ($config{'proxy'}) { 82 | # for socks proxy make sure you have LWP::Protocol::socks 83 | $ua->proxy(['http', 'https'], $config{'proxy'}); 84 | } 85 | 86 | my $ddir=$config{'bzrdir'}."/"; 87 | 88 | mkdir $ddir; 89 | mkdir $ddir."branch"; 90 | mkdir $ddir."checkout"; 91 | mkdir $ddir."repository"; 92 | mkdir $ddir."repository/indices"; 93 | mkdir $ddir."repository/packs"; 94 | 95 | print STDERR "[i] Downloading bzr files from $config{'url'}\n" if ($config{'verbose'}>0); 96 | 97 | my @resp404; 98 | my $respdetectmax=$config{'respdetectmax'}; 99 | print STDERR "[i] Auto-detecting 404 as 200 with $config{'respdetectmax'} requests\n" if ($config{'verbose'}>0); 100 | $config{'resp404correct'}=0; 101 | for (my $i=0; $i<$respdetectmax;$i++) { 102 | my $resp=getreq(randomstr($config{'resp404reqsize'})); 103 | if ($resp->is_success) { 104 | push @resp404, $resp; 105 | } else { 106 | $config{'resp404correct'}=1; 107 | last; # exit loop 108 | } 109 | } 110 | 111 | if ($config{'resp404correct'}) { 112 | print STDERR "[i] Getting correct 404 responses\n"; 113 | } else { 114 | print STDERR "[i] Getting 200 as 404 responses. Adapting...\n"; 115 | my $oldchopresp = substr($resp404[0]->content,0,$config{'resp404size'}); 116 | foreach my $entry (@resp404) { 117 | my $chopresp=substr($entry->content,0,$config{'resp404size'}); 118 | if ($oldchopresp eq $chopresp) { 119 | $oldchopresp=substr($entry->content,0,$config{'resp404size'}); 120 | } else { 121 | print STDERR "[i] 404 responses are different, you will have to customize script source code\n"; 122 | $config{'resp404content'}=$chopresp; 123 | last; # exit loop 124 | } 125 | } 126 | $config{'resp404content'}=$oldchopresp; 127 | } 128 | 129 | foreach my $file (@knownfiles) { 130 | getfile($file,$ddir.$file); 131 | } 132 | 133 | 134 | my $tofetch=0; 135 | my $fetched=0; 136 | my $pcount=1; 137 | my $fcount=0; 138 | while ($pcount>0) { 139 | print STDERR "[i] Running bzr check to check for missing items\n" if ($config{'verbose'}>0); 140 | open(PIPE,"bzr check 2>&1 |") or die "cannot find bzr: $!"; 141 | $pcount=0; 142 | $fcount=0; 143 | while () { 144 | print $_ if ($config{'verbose'}>9); 145 | chomp; 146 | if (/ERROR:/) { 147 | $tofetch++; 148 | m/'(.*?)'/; 149 | my $missingfile = $1; 150 | my $curdir = getcwd."/".$config{'bzrdir'}."/"; 151 | substr $missingfile, index($missingfile, $curdir), length $curdir, ''; 152 | print STDERR "[i] Getting $missingfile\n" if ($config{'verbose'}>0); 153 | my $res=getfile($missingfile,$ddir.$missingfile); 154 | if ($res->is_success) { 155 | $fcount++; 156 | $fetched++; 157 | } 158 | $pcount++; 159 | } 160 | } 161 | close(PIPE); 162 | print STDERR "[i] Got items with bzr check: $pcount\n" if ($config{'verbose'}>0); 163 | print STDERR "[i] Items fetched: $fcount\n" if ($config{'verbose'}>0); 164 | if ($fcount == 0) { 165 | last; 166 | } 167 | } 168 | 169 | print STDERR "[i] Finished fetching ($fetched/$tofetch)\n"; 170 | 171 | if ($config{'checkout'}) { 172 | print STDERR "[i] Checking out/Reverting source by calling bzr revert\n"; 173 | system("bzr revert"); 174 | } 175 | 176 | # -- END 177 | 178 | sub getreq { 179 | my ($file) = @_; 180 | my $furl = $config{'url'}."/".$file; 181 | my $req = HTTP::Request->new(GET => $furl); 182 | # Pass request to the user agent and get a response back 183 | my $res = $ua->request($req); 184 | return $res; 185 | } 186 | 187 | sub getfile { 188 | my ($file,$outfile) = @_; 189 | my $furl = $config{'url'}."/".$file; 190 | my $req = HTTP::Request->new(GET => $furl); 191 | # Pass request to the user agent and get a response back 192 | my $res = $ua->request($req); 193 | if ($res->is_success) { 194 | if (not $config{'resp404correct'}) { 195 | print STDERR "[d] got 200 for $file, but checking content\n" if ($config{'verbose'}>1);; 196 | my $chopresp=substr($res->content,0,$config{'resp404size'}); 197 | if ($chopresp eq $config{'resp404content'}) { 198 | print STDERR "[!] Not found for $file: 404 as 200\n" 199 | if ($config{'verbose'}>0); 200 | return $res; 201 | } 202 | } 203 | print STDERR "[d] found $file\n" if ($config{'verbose'}>0);; 204 | open (out,">$outfile") or die ("cannot open file $outfile: $!"); 205 | print out $res->content; 206 | close (out); 207 | } else { 208 | print STDERR "[!] Not found for $file: ".$res->status_line."\n" 209 | if ($config{'verbose'}>0); 210 | } 211 | return $res; 212 | } 213 | 214 | sub help { 215 | print "DVCS-Ripper: rip-bzr.pl. Copyright (C) Kost. Distributed under GPL.\n\n"; 216 | print "Usage: $0 [options] -u [bzrurl] \n"; 217 | print "\n"; 218 | print " -c perform 'bzr revert' on end (default)\n"; 219 | print " -b Use branch (default: $config{'branch'})\n"; 220 | print " -a Use agent (default: $config{'agent'})\n"; 221 | print " -s do not verify SSL cert\n"; 222 | print " -p use proxy for connections\n"; 223 | print " -v verbose (-vv will be more verbose)\n"; 224 | print "\n"; 225 | print "Example: $0 -v -u http://www.example.com/.bzr/\n"; 226 | print "Example: $0 # with url and options in $configfile\n"; 227 | print "Example: $0 -v -u -p socks://localhost:1080 http://www.example.com/.bzr/\n"; 228 | print "For socks like proxy, make sure you have LWP::Protocol::socks\n"; 229 | 230 | exit 0; 231 | } 232 | 233 | -------------------------------------------------------------------------------- /rip-cvs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | use IO::Socket::SSL; 6 | use LWP; 7 | use LWP::UserAgent; 8 | use HTTP::Request; 9 | use Getopt::Long; 10 | 11 | my $configfile="$ENV{HOME}/.rip-cvs"; 12 | my %config; 13 | $config{'branch'} = "HEAD"; 14 | $config{'scmdir'} = "CVS"; 15 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'; 16 | $config{'verbose'}=0; 17 | $config{'checkout'}=1; 18 | $config{'outdir'}='./'; 19 | $config{'rlevel'}=9; 20 | 21 | if (-e $configfile) { 22 | open(CONFIG,"<$configfile") or next; 23 | while () { 24 | chomp; # no newline 25 | s/#.*//; # no comments 26 | s/^\s+//; # no leading white 27 | s/\s+$//; # no trailing white 28 | next unless length; # anything left? 29 | my ($var, $value) = split(/\s*=\s*/, $_, 2); 30 | $config{$var} = $value; 31 | } 32 | close(CONFIG); 33 | } 34 | 35 | Getopt::Long::Configure ("bundling"); 36 | 37 | my $result = GetOptions ( 38 | "a|agent=s" => \$config{'agent'}, 39 | "b|branch=s" => \$config{'branch'}, 40 | "u|url=s" => \$config{'url'}, 41 | "c|checkout!" => \$config{'checkout'}, 42 | "s|sslignore!" => \$config{'sslignore'}, 43 | "v|verbose+" => \$config{'verbose'}, 44 | "h|help" => \&help 45 | ); 46 | 47 | my @scmfiles=( 48 | "Repository", 49 | "Root", 50 | "Entries" 51 | ); 52 | 53 | if ($config{'verbose'}>3) { 54 | foreach my $key ( keys %config ) 55 | { 56 | print "[c] $key => $config{$key}\n"; 57 | } 58 | } 59 | 60 | my $ua = LWP::UserAgent->new; 61 | $ua->agent($config{'agent'}); 62 | 63 | if ($config{'sslignore'}) { 64 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0); 65 | } 66 | 67 | # normalize URL 68 | if ($config{'url'} =~ /\/CVS/) { 69 | $config{'scmurl'} = $config{'url'}; 70 | $config{'regurl'} = $config{'url'}; 71 | $config{'regurl'} =~ s/\/CVS//; 72 | } else { 73 | $config{'scmurl'} = $config{'url'}."/CVS"; 74 | $config{'regurl'} = $config{'url'}; 75 | } 76 | 77 | processcvs ('',$config{'outdir'},0); 78 | 79 | sub processcvs { 80 | my ($url,$dir,$level) = @_; 81 | createcvsdirs ($dir); 82 | downloadcvsfiles ($url,$dir); 83 | 84 | return if ($level>$config{'rlevel'}); 85 | 86 | my $cntfile; 87 | 88 | my $ident=" "x$level; 89 | 90 | if (-e "$dir/$config{'scmdir'}/Root" and $level==0) { 91 | $cntfile++; 92 | print "$ident"."[i] CVSROOT="; 93 | displayfile("$dir/$config{'scmdir'}/Root"); 94 | } 95 | 96 | if (-e "$dir/$config{'scmdir'}/Repository" and $level==0) { 97 | $cntfile++; 98 | print "$ident"."[i] cvs checkout "; 99 | displayfile("$dir/$config{'scmdir'}/Repository"); 100 | } 101 | 102 | if (-e "$dir/$config{'scmdir'}/Entries") { 103 | $cntfile++; 104 | my $cont=readfile("$dir/$config{'scmdir'}/Entries"); 105 | # print $cont; 106 | # print sprintf "%s%1s %-25s %-14s %22s\n", "T", "Name", "Revision", "Date"; 107 | foreach ( split /\n/, $cont ) { 108 | if (/\//) { 109 | my @rec = split(/\//); 110 | print sprintf "%s%1s %-38s %-14s %22s\n", $ident, $rec[0], $rec[1], $rec[2], $rec[3]; 111 | if ($rec[0] eq 'D') { 112 | mkdir "$dir/$rec[1]"; 113 | processcvs("$url/$rec[1]","$dir/$rec[1]",$level+1); 114 | } 115 | } 116 | } 117 | } 118 | 119 | if ($level==0) { 120 | if ($cntfile > 0) { 121 | print STDERR "$ident"."[i] CVS identified on $config{'url'} by $cntfile guesses\n"; 122 | } else { 123 | print STDERR "$ident"."[i] CVS not identified, check URL: $config{'url'}\n"; 124 | } 125 | } 126 | 127 | } 128 | 129 | 130 | sub displayfile { 131 | my ($file) = @_; 132 | open (FILE, "<$file") or warn ("cannot open $file: $!"); 133 | while () { 134 | print $_; 135 | } 136 | close (FILE); 137 | } 138 | 139 | sub readfile { 140 | my ($file) = @_; 141 | open (FILE, "<$file") or warn ("cannot open $file: $!"); 142 | my $str; 143 | while () { 144 | $str=$str.$_; 145 | } 146 | close (FILE); 147 | # print ":$str:\n"; 148 | return ($str); 149 | } 150 | 151 | sub createcvsdirs { 152 | my ($dir) = @_; 153 | mkdir $dir."/CVS"; 154 | } 155 | 156 | sub downloadcvsfiles { 157 | my ($url,$dir) = @_; 158 | foreach my $file (@scmfiles) { 159 | my $furl = "$url/$config{'scmdir'}/$file"; 160 | getfile($furl,"$dir/$config{'scmdir'}/$file"); 161 | } 162 | } 163 | 164 | sub getfile { 165 | my ($file,$outfile) = @_; 166 | my $furl = $config{'regurl'}."/".$file; 167 | my $req = HTTP::Request->new(GET => $furl); 168 | # Pass request to the user agent and get a response back 169 | my $res = $ua->request($req); 170 | if ($res->is_success) { 171 | print STDERR "[d] found $file\n" if ($config{'verbose'}>1);; 172 | open (out,">$outfile") or die ("cannot open file '$outfile': $!"); 173 | print out $res->content; 174 | close (out); 175 | } else { 176 | print STDERR "[!] Not found for $furl => $file: ".$res->status_line."\n" 177 | if ($config{'verbose'}>1); 178 | } 179 | return $res; 180 | } 181 | 182 | sub help { 183 | print "DVCS-Ripper: rip-cvs.pl. Copyright (C) Kost. Distributed under GPL.\n\n"; 184 | print "Usage: $0 [options] -u [url] \n"; 185 | print "\n"; 186 | print " -c perform 'checkout' on end (default)\n"; 187 | print " -b Use branch (default: $config{'branch'})\n"; 188 | print " -a Use agent (default: $config{'agent'})\n"; 189 | print " -s ignore SSL certification verification\n"; 190 | print " -v verbose (-vv will be more verbose)\n"; 191 | print "\n"; 192 | 193 | print "Example: $0 -v -u http://www.example.com/CVS/\n"; 194 | print "Example: $0 # with url and options in $configfile\n"; 195 | 196 | exit 0; 197 | } 198 | 199 | -------------------------------------------------------------------------------- /rip-git.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | use Cwd; 6 | use IPC::SysV qw(IPC_PRIVATE S_IRWXU IPC_CREAT SEM_UNDO ftok); 7 | use IPC::Semaphore; 8 | use IPC::SharedMem; 9 | 10 | use IO::Socket::SSL; 11 | use LWP; 12 | use LWP::UserAgent; 13 | use HTTP::Request; 14 | use HTTP::Response; 15 | use Getopt::Long; 16 | 17 | use Digest::SHA qw(sha1 sha1_hex); 18 | 19 | my $configfile="$ENV{HOME}/.rip-git"; 20 | my %config; 21 | $config{'branch'} = "master"; 22 | $config{'gitdir'} = ".git"; 23 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'; 24 | $config{'verbose'}=0; 25 | $config{'checkout'}=1; 26 | 27 | $config{'redirects'}=0; 28 | 29 | $config{'respdetectmax'}=3; 30 | $config{'resp404size'}=256; 31 | $config{'resp404reqsize'}=32; 32 | 33 | $config{'gitpackbasename'}='pack'; 34 | 35 | sub randomstr { 36 | my($num) = @_; 37 | my @chars = ("A".."Z", "a".."z"); 38 | my $string; 39 | $string .= $chars[rand @chars] for 1..$num; 40 | return $string; 41 | } 42 | 43 | if (-e $configfile) { 44 | open(CONFIG,"<$configfile") or next; 45 | while () { 46 | chomp; # no newline 47 | s/#.*//; # no comments 48 | s/^\s+//; # no leading white 49 | s/\s+$//; # no trailing white 50 | next unless length; # anything left? 51 | my ($var, $value) = split(/\s*=\s*/, $_, 2); 52 | $config{$var} = $value; 53 | } 54 | close(CONFIG); 55 | } 56 | 57 | Getopt::Long::Configure ("bundling"); 58 | 59 | my $result = GetOptions ( 60 | "a|agent=s" => \$config{'agent'}, 61 | "b|branch=s" => \$config{'branch'}, 62 | "c|checkout!" => \$config{'checkout'}, 63 | "e|redis=s" => \$config{'redis'}, 64 | "g|guess" => \$config{'intguess'}, 65 | "k|session=s" => \$config{'session'}, 66 | "n|newer" => \$config{'newer'}, 67 | "m|mkdir" => \$config{'mkdir'}, 68 | "o|output=s" => \$config{'output'}, 69 | "p|proxy=s" => \$config{'proxy'}, 70 | "r|redirects=i" => \$config{'redirects'}, 71 | "s|sslignore!" => \$config{'sslignore'}, 72 | "t|tasks=i" => \$config{'tasks'}, 73 | "u|url=s" => \$config{'url'}, 74 | "x|brute" => \$config{'brute'}, 75 | "v|verbose+" => \$config{'verbose'}, 76 | "ba|basicauth=s" => \$config{'basicauth'}, 77 | "h|help" => \&help 78 | ); 79 | 80 | my @gitfiles=( 81 | "COMMIT_EDITMSG", 82 | "config", 83 | "description", 84 | "HEAD", 85 | "index", 86 | "packed-refs" 87 | ); 88 | 89 | my $cwd=cwd(); 90 | my $urldir=$config{'url'}; 91 | $urldir=~s#[;:&~/]#_#ig; 92 | 93 | if ($config{'output'}) { 94 | $cwd = cwd(); 95 | if ($config{'mkdir'}) { 96 | mkdir $config{'output'}."/".$urldir; 97 | chdir $config{'output'}."/".$urldir; 98 | } else { 99 | chdir $config{'output'}; 100 | } 101 | } 102 | 103 | my @commits; 104 | my $ua = LWP::UserAgent->new; 105 | 106 | $ua->agent($config{'agent'}); 107 | $ua->max_redirect($config{'redirects'}); 108 | if($config{'basicauth'}) { 109 | my $key = sprintf '%s %s', "Basic", $config{'basicauth'}; 110 | $ua->default_header('Authorization' => $key); 111 | } 112 | 113 | 114 | if ($config{'sslignore'}) { 115 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0); 116 | } 117 | if ($config{'proxy'}) { 118 | # for socks proxy make sure you have LWP::Protocol::socks 119 | $ua->proxy(['http', 'https'], $config{'proxy'}); 120 | } 121 | 122 | my $gd=$config{'gitdir'}."/"; 123 | 124 | mkdir $gd; 125 | 126 | print STDERR "[i] Downloading git files from $config{'url'}\n" if ($config{'verbose'}>0); 127 | 128 | if ($config{'verbose'}>2) { 129 | print STDERR "[i] Using agent: $config{'agent'}\n"; 130 | print STDERR "[i] Using redirects: $config{'redirects'}\n"; 131 | print STDERR "[i] Using proxy: $config{'proxy'}\n"; 132 | } 133 | 134 | my @resp404; 135 | my $respdetectmax=$config{'respdetectmax'}; 136 | print STDERR "[i] Auto-detecting 404 as 200 with $config{'respdetectmax'} requests\n" if ($config{'verbose'}>0); 137 | $config{'resp404correct'}=0; 138 | for (my $i=0; $i<$respdetectmax;$i++) { 139 | my $resp=getreq(randomstr($config{'resp404reqsize'})); 140 | if ($resp->is_success) { 141 | push @resp404, $resp; 142 | } else { 143 | $config{'resp404correct'}=1; 144 | last; # exit loop 145 | } 146 | } 147 | 148 | if ($config{'resp404correct'}) { 149 | print STDERR "[i] Getting correct 404 responses\n" if ($config{'verbose'}>0); 150 | } else { 151 | print STDERR "[i] Getting 200 as 404 responses. Adapting...\n" if ($config{'verbose'}>0); 152 | my $oldchopresp = substr($resp404[0]->content,0,$config{'resp404size'}); 153 | foreach my $entry (@resp404) { 154 | my $chopresp=substr($entry->content,0,$config{'resp404size'}); 155 | if ($oldchopresp eq $chopresp) { 156 | $oldchopresp=substr($entry->content,0,$config{'resp404size'}); 157 | } else { 158 | print STDERR "[i] 404 responses are different, you will have to customize script source code\n"; 159 | $config{'resp404content'}=$chopresp; 160 | last; # exit loop 161 | } 162 | } 163 | $config{'resp404content'}=$oldchopresp; 164 | } 165 | 166 | unless ($config{'session'}) { 167 | $config{'session'}=randomstr(8); 168 | } 169 | 170 | print STDERR "[i] Using session name: $config{'session'}\n"; 171 | 172 | my $haveredis = eval 173 | { 174 | require Redis; 175 | Redis->import(); 176 | 1; 177 | }; 178 | 179 | my $havealg = eval { 180 | require Algorithm::Combinatorics; 181 | Algorithm::Combinatorics->import(qw(variations_with_repetition permutations)); 182 | 1; 183 | }; 184 | 185 | if ($config{'redis'}) { 186 | if ($haveredis) { 187 | if ($ENV{'REDIS_PORT_6379_TCP_ADDR'}) { 188 | print STDERR "[i] Detected redis docker environment variable, overriding: $config{'redis'}\n"; 189 | $config{'redis'}=$ENV{'REDIS_PORT_6379_TCP_ADDR'}; 190 | } 191 | print STDERR "[i] Using redis: $config{'redis'}\n"; 192 | $config{'redisobj'} = Redis->new(server => $config{'redis'}); 193 | $config{'redis-good'} = $config{'session'}."-good"; 194 | $config{'redis-bad'} = $config{'session'}."-bad"; 195 | } else { 196 | print STDERR "[i] Please install Perl Redis module\n"; 197 | } 198 | } 199 | 200 | 201 | foreach my $file (@gitfiles) { 202 | my $furl = $config{'url'}."/".$file; 203 | getfile($file,$gd.$file); 204 | } 205 | 206 | mkdir $gd."logs"; 207 | mkdir $gd."logs/refs"; 208 | mkdir $gd."logs/refs/heads"; 209 | mkdir $gd."logs/refs/remotes"; 210 | 211 | mkdir $gd."objects"; 212 | mkdir $gd."objects/info"; 213 | mkdir $gd."objects/pack"; 214 | 215 | getfile("objects/info/alternates",$gd."objects/info/alternates"); 216 | 217 | mkdir $gd."info"; 218 | getfile("info/grafts",$gd."info/grafts"); 219 | 220 | my $res = getfile("logs/HEAD",$gd."logs/HEAD"); 221 | 222 | my @lines = split /\n/, $res->content; 223 | foreach my $line (@lines) { 224 | my @fields=split(/\s+/, $line); 225 | my $ref = $fields[1]; 226 | getobject($gd,$ref); 227 | } 228 | 229 | mkdir $gd."refs"; 230 | mkdir $gd."refs/heads"; 231 | my $res = getfile("refs/heads/".$config{'branch'},$gd."refs/heads/".$config{'branch'}); 232 | mkdir $gd."refs/remotes"; 233 | mkdir $gd."refs/tags"; 234 | 235 | # process packs file: objects/info/packs 236 | my $infopacks='objects/info/packs'; 237 | my $res=getrealreq($infopacks); 238 | if ($res->is_success) { 239 | print STDERR "[!] found info file for packs, trying to process them: $infopacks\n" if ($config{'verbose'}>0); 240 | writefile($gd.$infopacks,$res->content); 241 | my @items=split("\n",$res->content); 242 | foreach my $item (@items) { 243 | print STDERR "[d] processing packs entry: $item\n" if ($config{'verbose'}>1); 244 | my ($imark,$ifile) = split(" ",$item); 245 | my $packfn="objects/pack/$ifile"; 246 | getfile($packfn,$gd.$packfn); 247 | $packfn=~s/\.pack$/.idx/g; 248 | getfile($packfn,$gd.$packfn); 249 | } 250 | } 251 | 252 | # Parallel Tasks magic 253 | my $haveppf = eval 254 | { 255 | require Parallel::ForkManager; 256 | Parallel::ForkManager->import(); 257 | 1; 258 | }; 259 | my $pm; 260 | my $sem; 261 | my $shm; 262 | my $shmsize=16; 263 | if ($config{'tasks'}>0) { 264 | if ($haveppf) { 265 | $pm = Parallel::ForkManager->new($config{'tasks'}); 266 | $sem = new IPC::Semaphore( ftok( $0, 0 ), 1, S_IRWXU | IPC_CREAT ); 267 | if ($sem) { 268 | $sem->setval(0,0); 269 | $shm = IPC::SharedMem->new(IPC_PRIVATE, 16, S_IRWXU); 270 | } else { 271 | die("Error creating IPC Semaphore: $!\n"); 272 | } 273 | print STDERR "[i] Using $config{'tasks'} parallel tasks\n" if ($config{'verbose'}>0); 274 | 275 | } else { 276 | print STDERR "[!] Please install Parallel::Prefork CPAN module for parallel requests\n"; 277 | $config{'tasks'}=0; 278 | } 279 | } 280 | 281 | my $pcount=1; 282 | my $fcount=0; 283 | while ($pcount>0) { 284 | print STDERR "[i] Running git fsck to check for missing items\n" if ($config{'verbose'}>0); 285 | open(PIPE,"git fsck |") or die "cannot find git: $!"; 286 | $pcount=0; 287 | $fcount=0; 288 | if ($config{'tasks'}>0) { 289 | $sem->setval(0,0); 290 | $shm->write($fcount,0,$shmsize); 291 | } 292 | while () { 293 | chomp; 294 | if (/^missing/) { 295 | my @getref = split (/\s+/); 296 | $pcount++; 297 | if ($config{'tasks'}>0) { 298 | $pm->start() and next; 299 | my $res = getobject($gd,$getref[2]); # 3rd field is sha1 300 | if ($res->is_success) { 301 | $sem->op( 0, 1, SEM_UNDO ); 302 | $fcount=$shm->read(0, $shmsize); 303 | $shm->write($fcount+1,0,$shmsize); 304 | $sem->op( 0, -1, SEM_UNDO ); 305 | } 306 | $pm->finish; 307 | } else { 308 | my $res = getobject($gd,$getref[2]); # 3rd field is sha1 309 | if ($res->is_success) { 310 | $fcount++; 311 | } 312 | } 313 | } 314 | } 315 | if ($config{'tasks'}>0) { 316 | print STDERR "[i] Waiting for children to finish\n" if ($config{'verbose'}>0); 317 | $pm->wait_all_children(); 318 | $fcount = $shm->read(0, $shmsize); 319 | } 320 | close(PIPE); 321 | print STDERR "[i] Got items with git fsck: $pcount, Items fetched: $fcount\n" if ($config{'verbose'}>0); 322 | if ($fcount == 0) { 323 | print STDERR "[!] No more items to fetch. That's it!\n"; 324 | last; 325 | } 326 | } 327 | 328 | if ($config{'intguess'}) { 329 | intguess(); 330 | } 331 | 332 | if ($config{'brute'}) { 333 | bruteguess(); 334 | } 335 | 336 | if ($config{'redisobj'}) { 337 | print STDERR "[i] Closing redis connection\n" if ($config{'verbose'}>0); 338 | $config{'redisobj'}->quit; 339 | } 340 | 341 | if ($config{'checkout'}) { 342 | system("git checkout -f"); 343 | } 344 | 345 | if ($config{'output'}) { 346 | chdir $cwd; 347 | } 348 | 349 | sub bruteguess { 350 | print STDERR "[!] Performing pure brute force guessing of packed refs\n"; 351 | my $pmb; 352 | my @digestchars=qw(0 1 2 3 4 5 6 7 8 9 0 a b c d e f); 353 | my $iter = variations_with_repetition(\@digestchars, 40); 354 | if ($config{'tasks'}>0) { 355 | if ($haveppf) { 356 | $pmb = Parallel::ForkManager->new($config{'tasks'}); 357 | } 358 | } 359 | while (my $c = $iter->next) { 360 | my $p=""; 361 | foreach my $i (@{$c}) { $p = $p.$i } 362 | print STDERR "[i] Brute forcing digest item: $p \n" if ($config{'verbose'}>0); 363 | if ($config{'tasks'}>0) { 364 | $pmb->start() and next; 365 | getpackedref($p); 366 | $pmb->finish; 367 | } else { 368 | getpackedref($p); 369 | } 370 | } 371 | if ($config{'tasks'}>0) { 372 | print STDERR "[i] Waiting for children to finish\n" if ($config{'verbose'}>0); 373 | $pmb->wait_all_children(); 374 | } 375 | print STDERR "[!] Finished brute force guessing of packed refs. Does world still exists? :)\n"; 376 | } 377 | 378 | # get packed refs from given digest 379 | sub getpackedref { 380 | my ($digest) = @_; 381 | 382 | my $packfn="objects/pack/".$digest.".pack"; 383 | getfile($packfn,$gd.$packfn); 384 | my $idxfn="objects/pack/".$digest.".idx"; 385 | getfile($idxfn,$gd.$idxfn); 386 | } 387 | 388 | # calculate possible digest from array of digests 389 | sub getintitem { 390 | my ($p) = @_; 391 | 392 | my $sha = Digest::SHA->new(1); # use SHA-1 393 | foreach my $item (@{$p}) { 394 | $sha->add($item."\n"); 395 | } 396 | my $digestguess=$sha->hexdigest(); 397 | getpackedref($digestguess); 398 | } 399 | 400 | # try to intelligently guess packed refs 401 | sub intguess { 402 | print STDERR "[!] Performing intelligent guessing of packed refs\n"; 403 | my @missingitems = $config{'redis-bad'}; 404 | my $iter = permutations(\@missingitems); 405 | my $pmg; 406 | if ($config{'tasks'}>0) { 407 | if ($haveppf) { 408 | $pmg = Parallel::ForkManager->new($config{'tasks'}); 409 | } 410 | } 411 | while (my $p = $iter->next) { 412 | print STDERR "[i] Guessing item from permutations\n" if ($config{'verbose'}>0); 413 | if ($config{'tasks'}>0) { 414 | $pmg->start() and next; 415 | getintitem($p); 416 | $pmg->finish; 417 | } else { 418 | getintitem($p); 419 | } 420 | } 421 | if ($config{'tasks'}>0) { 422 | print STDERR "[i] Waiting for children to finish\n" if ($config{'verbose'}>0); 423 | $pmg->wait_all_children(); 424 | } 425 | print STDERR "[!] Finished intelligent guessing of packed refs\n"; 426 | } 427 | 428 | sub getobject { 429 | my ($gd,$ref) = @_; 430 | my $rdir = substr ($ref,0,2); 431 | my $rfile = substr ($ref,2); 432 | my $redisc; 433 | if ($config{'redisobj'}) { 434 | $redisc = Redis->new(server => $config{'redis'}); 435 | } 436 | if ($config{'redisobj'}) { 437 | if ($redisc->hexists($config{'redis-bad'},$ref)) { 438 | $redisc->quit; 439 | return HTTP::Response->new(404); 440 | } 441 | if ($redisc->hexists($config{'redis-good'},$ref)) { 442 | $redisc->quit; 443 | return HTTP::Response->new(200); 444 | } 445 | print STDERR "[!] Not found in redis cache: $ref\n" if ($config{'verbose'}>1);; 446 | } 447 | mkdir $gd."objects/$rdir"; 448 | my $r=getfile("objects/$rdir/$rfile",$gd."objects/$rdir/$rfile"); 449 | if ($config{'redisobj'}) { 450 | if ($r->is_success) { 451 | $redisc->hset($config{'redis-good'}, $ref, 200); 452 | } else { 453 | $redisc->hset($config{'redis-bad'}, $ref, 404); 454 | } 455 | $redisc->quit; 456 | } 457 | return $r; 458 | } 459 | 460 | sub getreq { 461 | my ($file) = @_; 462 | my $furl = $config{'url'}."/".$file; 463 | my $req = HTTP::Request->new(GET => $furl); 464 | # Pass request to the user agent and get a response back 465 | my $res = $ua->request($req); 466 | return $res; 467 | } 468 | 469 | sub getrealreq { 470 | my ($file) = @_; 471 | my $res = getreq($file); 472 | if ($res->is_success) { 473 | if (not $config{'resp404correct'}) { 474 | print STDERR "[d] got 200 for packs but checking content\n" if ($config{'verbose'}>1); 475 | my $chopresp=substr($res->content,0,$config{'resp404size'}); 476 | if ($chopresp eq $config{'resp404content'}) { 477 | print STDERR "[!] Not found for: 404 as 200\n" 478 | if ($config{'verbose'}>0); 479 | # return not found 480 | my $r = HTTP::Response->new(404); 481 | # $r = HTTP::Response->new( $code, $msg, $header, $content ) 482 | return $r; 483 | } 484 | } 485 | } 486 | return $res; 487 | } 488 | 489 | sub writefile { 490 | my ($file, $content) = @_; 491 | open(my $fh, '>', $file) or return undef; 492 | print $fh $content; 493 | close $fh; 494 | } 495 | 496 | sub getfile { 497 | my ($file,$outfile) = @_; 498 | if ($config{'newer'}) { 499 | if (-e $outfile) { 500 | print STDERR "[!] Not overwriting file: $outfile\n" if ($config{'verbose'}>0); 501 | my $r = HTTP::Response->new(200); 502 | return $r; 503 | } 504 | } 505 | my $furl = $config{'url'}."/".$file; 506 | my $req = HTTP::Request->new(GET => $furl); 507 | # Pass request to the user agent and get a response back 508 | my $res = $ua->request($req); 509 | if ($res->is_success) { 510 | if (not $config{'resp404correct'}) { 511 | print STDERR "[d] got 200 for $file, but checking content\n" if ($config{'verbose'}>1);; 512 | my $chopresp=substr($res->content,0,$config{'resp404size'}); 513 | if ($chopresp eq $config{'resp404content'}) { 514 | print STDERR "[!] Not found for $file: 404 as 200\n" 515 | if ($config{'verbose'}>0); 516 | my $r = HTTP::Response->new(404); 517 | return $r; 518 | } 519 | } 520 | print STDERR "[d] found $file\n" if ($config{'verbose'}>0);; 521 | open (out,">$outfile") or die ("cannot open file: $!"); 522 | print out $res->content; 523 | close (out); 524 | } else { 525 | print STDERR "[!] Not found for $file: ".$res->status_line."\n" 526 | if ($config{'verbose'}>0); 527 | } 528 | return $res; 529 | } 530 | 531 | sub help { 532 | print "DVCS-Ripper: rip-git.pl. Copyright (C) Kost. Distributed under GPL.\n\n"; 533 | print "Usage: $0 [options] -u [giturl] \n"; 534 | print "\n"; 535 | print " -c perform 'git checkout -f' on end (default)\n"; 536 | print " -b Use branch (default: $config{'branch'})\n"; 537 | print " -e Use redis server as server:port\n"; 538 | print " -g Try to inteligently guess name of packed refs\n"; 539 | print " -k Use session name for redis (default: random)\n"; 540 | print " -a Use agent (default: $config{'agent'})\n"; 541 | print " -n do not overwrite files\n"; 542 | print " -m mkdir URL name when outputting (works good with -o)\n"; 543 | print " -o specify output dir\n"; 544 | print " -r specify max number of redirects (default: $config{'redirects'})\n"; 545 | print " -s do not verify SSL cert\n"; 546 | print " -t use parallel tasks\n"; 547 | print " -p use proxy for connections\n"; 548 | print " -x brute force packed refs (extremely slow!!)\n"; 549 | print " -v verbose (-vv will be more verbose)\n"; 550 | print " -ba set basic auth key\n"; 551 | print "\n"; 552 | print "Example: $0 -v -u http://www.example.com/.git/\n"; 553 | print "Example: $0 # with url and options in $configfile\n"; 554 | print "Example: $0 -v -u -p socks://localhost:1080 http://www.example.com/.git/\n"; 555 | print "For socks like proxy, make sure you have LWP::Protocol::socks\n"; 556 | 557 | exit 0; 558 | } 559 | -------------------------------------------------------------------------------- /rip-hg.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | use IO::Socket::SSL; 6 | use LWP; 7 | use LWP::UserAgent; 8 | use HTTP::Request; 9 | use HTTP::Response; 10 | use Getopt::Long; 11 | 12 | use File::Path qw(make_path); 13 | use File::Basename; 14 | 15 | 16 | my $configfile="$ENV{HOME}/.rip-hg"; 17 | my %config; 18 | $config{'hgdir'} = ".hg"; 19 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'; 20 | $config{'verbose'}=0; 21 | $config{'checkout'}=1; 22 | 23 | $config{'respdetectmax'}=3; 24 | $config{'resp404size'}=256; 25 | $config{'resp404reqsize'}=32; 26 | 27 | sub randomstr { 28 | my($num) = @_; 29 | my @chars = ("A".."Z", "a".."z"); 30 | my $string; 31 | $string .= $chars[rand @chars] for 1..$num; 32 | return $string; 33 | } 34 | 35 | if (-e $configfile) { 36 | open(CONFIG,"<$configfile") or next; 37 | while () { 38 | chomp; # no newline 39 | s/#.*//; # no comments 40 | s/^\s+//; # no leading white 41 | s/\s+$//; # no trailing white 42 | next unless length; # anything left? 43 | my ($var, $value) = split(/\s*=\s*/, $_, 2); 44 | $config{$var} = $value; 45 | } 46 | close(CONFIG); 47 | } 48 | 49 | Getopt::Long::Configure ("bundling"); 50 | 51 | my $result = GetOptions ( 52 | "a|agent=s" => \$config{'agent'}, 53 | "b|branch=s" => \$config{'branch'}, 54 | "u|url=s" => \$config{'url'}, 55 | "p|proxy=s" => \$config{'proxy'}, 56 | "c|checkout!" => \$config{'checkout'}, 57 | "s|sslignore!" => \$config{'sslignore'}, 58 | "v|verbose+" => \$config{'verbose'}, 59 | "h|help" => \&help 60 | ); 61 | 62 | my @knownfiles=( 63 | '00changelog.i', 64 | 'dirstate', 65 | 'requires', 66 | 'branch', 67 | 'branchheads.cache', 68 | 'last-message.txt', 69 | 'tags.cache', 70 | 'undo.branch', 71 | 'undo.desc', 72 | 'undo.dirstate', 73 | 'store/00changelog.i', 74 | 'store/00changelog.d', 75 | 'store/00manifest.i', 76 | 'store/00manifest.d', 77 | 'store/fncache', 78 | 'store/undo', 79 | '.hgignore' 80 | ); 81 | 82 | my $ua = LWP::UserAgent->new; 83 | 84 | $ua->agent($config{'agent'}); 85 | 86 | if ($config{'sslignore'}) { 87 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0); 88 | } 89 | if ($config{'proxy'}) { 90 | # for socks proxy make sure you have LWP::Protocol::socks 91 | $ua->proxy(['http', 'https'], $config{'proxy'}); 92 | } 93 | 94 | my $ddir=$config{'hgdir'}."/"; 95 | 96 | mkdir $ddir; 97 | mkdir $ddir."store"; 98 | mkdir $ddir."store/data"; 99 | 100 | print STDERR "[i] Downloading hg files from $config{'url'}\n" if ($config{'verbose'}>0); 101 | 102 | my @resp404; 103 | my $respdetectmax=$config{'respdetectmax'}; 104 | print STDERR "[i] Auto-detecting 404 as 200 with $config{'respdetectmax'} requests\n" if ($config{'verbose'}>0); 105 | $config{'resp404correct'}=0; 106 | for (my $i=0; $i<$respdetectmax;$i++) { 107 | my $resp=getreq(randomstr($config{'resp404reqsize'})); 108 | if ($resp->is_success) { 109 | push @resp404, $resp; 110 | } else { 111 | $config{'resp404correct'}=1; 112 | last; # exit loop 113 | } 114 | } 115 | 116 | if ($config{'resp404correct'}) { 117 | print STDERR "[i] Getting correct 404 responses\n"; 118 | } else { 119 | print STDERR "[i] Getting 200 as 404 responses. Adapting...\n"; 120 | my $oldchopresp = substr($resp404[0]->content,0,$config{'resp404size'}); 121 | foreach my $entry (@resp404) { 122 | my $chopresp=substr($entry->content,0,$config{'resp404size'}); 123 | if ($oldchopresp eq $chopresp) { 124 | $oldchopresp=substr($entry->content,0,$config{'resp404size'}); 125 | } else { 126 | print STDERR "[i] 404 responses are different, you will have to customize script source code\n"; 127 | $config{'resp404content'}=$chopresp; 128 | last; # exit loop 129 | } 130 | } 131 | $config{'resp404content'}=$oldchopresp; 132 | } 133 | 134 | foreach my $file (@knownfiles) { 135 | getfile($file,$ddir.$file); 136 | } 137 | 138 | print STDERR "[i] Running hg status to check for missing items\n" if ($config{'verbose'}>0); 139 | my @repfiles; 140 | open(PIPE,"hg status -A |") or die "cannot find hg: $!"; 141 | while () { 142 | chomp; 143 | my @getref = split (/\s+/); 144 | push @repfiles, $getref[1]; # 2nd field is filename 145 | } 146 | close(PIPE); 147 | print STDERR "[i] Got items with hg status: $#repfiles\n" if ($config{'verbose'}>0); 148 | 149 | 150 | my $numfiles=0; 151 | foreach my $file (@repfiles) { 152 | my($filename, $dirs, $suffix) = fileparse($file); 153 | my $rpath="store/data/".$file; 154 | make_path($ddir."store/data/".$dirs); 155 | my $res=getfile($rpath.".d",$ddir.$rpath.".d"); 156 | my $res=getfile($rpath.".i",$ddir.$rpath.".i"); 157 | if ($res->is_success) { 158 | if ($config{'checkout'}) { 159 | system("hg revert ".$file); 160 | } 161 | $numfiles++; 162 | 163 | } 164 | } 165 | 166 | my $maxfiles=$#repfiles+1; 167 | print STDERR "[i] Finished ($numfiles of $maxfiles)\n"; 168 | 169 | # -- END 170 | 171 | sub getreq { 172 | my ($file) = @_; 173 | my $furl = $config{'url'}."/".$file; 174 | my $req = HTTP::Request->new(GET => $furl); 175 | # Pass request to the user agent and get a response back 176 | my $res = $ua->request($req); 177 | return $res; 178 | } 179 | 180 | sub getfile { 181 | my ($file,$outfile) = @_; 182 | my $furl = $config{'url'}."/".$file; 183 | my $req = HTTP::Request->new(GET => $furl); 184 | # Pass request to the user agent and get a response back 185 | my $res = $ua->request($req); 186 | if ($res->is_success) { 187 | if (not $config{'resp404correct'}) { 188 | print STDERR "[d] got 200 for $file, but checking content\n" if ($config{'verbose'}>1);; 189 | my $chopresp=substr($res->content,0,$config{'resp404size'}); 190 | if ($chopresp eq $config{'resp404content'}) { 191 | print STDERR "[!] Not found for $file: 404 as 200\n" 192 | if ($config{'verbose'}>0); 193 | return $res; 194 | } 195 | } 196 | print STDERR "[d] found $file\n" if ($config{'verbose'}>0);; 197 | open (out,">$outfile") or die ("cannot open file $outfile: $!"); 198 | print out $res->content; 199 | close (out); 200 | } else { 201 | print STDERR "[!] Not found for $file: ".$res->status_line."\n" 202 | if ($config{'verbose'}>0); 203 | } 204 | return $res; 205 | } 206 | 207 | sub help { 208 | print "DVCS-Ripper: rip-hg.pl. Copyright (C) Kost. Distributed under GPL.\n\n"; 209 | print "Usage: $0 [options] -u [hgurl] \n"; 210 | print "\n"; 211 | print " -c perform 'hg revert' on end (default)\n"; 212 | print " -b Use branch (default: $config{'branch'})\n"; 213 | print " -a Use agent (default: $config{'agent'})\n"; 214 | print " -s do not verify SSL cert\n"; 215 | print " -p use proxy for connections\n"; 216 | print " -v verbose (-vv will be more verbose)\n"; 217 | print "\n"; 218 | print "Example: $0 -v -u http://www.example.com/.hg/\n"; 219 | print "Example: $0 # with url and options in $configfile\n"; 220 | print "Example: $0 -v -u -p socks://localhost:1080 http://www.example.com/.hg/\n"; 221 | print "For socks like proxy, make sure you have LWP::Protocol::socks\n"; 222 | 223 | exit 0; 224 | } 225 | 226 | -------------------------------------------------------------------------------- /rip-svn.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | use IO::Socket::SSL; 6 | use LWP; 7 | use DBI; 8 | use LWP::UserAgent; 9 | use HTTP::Request; 10 | use Getopt::Long; 11 | 12 | my $configfile="$ENV{HOME}/.rip-svn"; 13 | my %config; 14 | $config{'branch'} = "trunk"; 15 | $config{'scmdir'} = ".svn"; 16 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'; 17 | $config{'verbose'}=0; 18 | $config{'checkout'}=1; 19 | $config{'outdir'}='./'; 20 | $config{'upgrade'}=1; 21 | 22 | if (-e $configfile) { 23 | open(CONFIG,"<$configfile") or next; 24 | while () { 25 | chomp; # no newline 26 | s/#.*//; # no comments 27 | s/^\s+//; # no leading white 28 | s/\s+$//; # no trailing white 29 | next unless length; # anything left? 30 | my ($var, $value) = split(/\s*=\s*/, $_, 2); 31 | $config{$var} = $value; 32 | } 33 | close(CONFIG); 34 | } 35 | 36 | Getopt::Long::Configure ("bundling"); 37 | 38 | my $result = GetOptions ( 39 | "a|agent=s" => \$config{'agent'}, 40 | "b|branch=s" => \$config{'branch'}, 41 | "u|url=s" => \$config{'url'}, 42 | "c|checkout!" => \$config{'checkout'}, 43 | "s|sslignore!" => \$config{'sslignore'}, 44 | "v|verbose+" => \$config{'verbose'}, 45 | "h|help" => \&help 46 | ); 47 | 48 | my @scmfiles=( 49 | "all-wcprops", 50 | "entries", 51 | "format", 52 | "wc.db" 53 | ); 54 | 55 | if ($config{'verbose'}>3) { 56 | foreach my $key ( keys %config ) 57 | { 58 | print "$key => $config{$key}\n"; 59 | } 60 | } 61 | 62 | my @commits; 63 | my $ua = LWP::UserAgent->new; 64 | $ua->agent($config{'agent'}); 65 | 66 | if ($config{'sslignore'}) { 67 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0); 68 | } 69 | 70 | # normalize URL 71 | if ($config{'url'} =~ /\/\.svn/) { 72 | $config{'scmurl'} = $config{'url'}; 73 | $config{'regurl'} = $config{'url'}; 74 | $config{'regurl'} =~ s/\/\.svn//; 75 | } else { 76 | $config{'scmurl'} = $config{'url'}."/.svn"; 77 | $config{'regurl'} = $config{'url'}; 78 | } 79 | 80 | createsvndirs($config{'outdir'}); 81 | downloadsvnfiles('',$config{'outdir'}); 82 | 83 | if (-e "$config{'scmdir'}/wc.db") { 84 | print STDERR "[i] Found new SVN client storage format!\n"; 85 | my $dbh = DBI->connect("dbi:SQLite:dbname=.svn/wc.db","",""); 86 | 87 | my $sqlr = 'SELECT id,root,uuid FROM repository'; 88 | my $sth = $dbh->prepare($sqlr) or warn "Couldn't prepare statement '$sqlr': " . $dbh->errstr; 89 | $sth->execute(); 90 | while (my $record = $sth->fetchrow_hashref()) { 91 | print "REP INFO => $record->{'id'}:$record->{'root'}:$record->{'uuid'}\n"; 92 | } 93 | 94 | my $sqlp = "select checksum,compression,md5_checksum from pristine"; 95 | my $sthp = $dbh->prepare($sqlp) or warn "Couldn't prepare statement '$sqlp': " . $dbh->errstr; 96 | $sthp->execute(); 97 | while (my $record = $sthp->fetchrow_hashref()) { 98 | print "REC INFO => $record->{'checksum'}:$record->{'compression'}:$record->{'checksum_md5'}\n" if ($config{'verbose'}>1);; 99 | if ($record->{'checksum'} =~ /\$sha1\$/) { 100 | my $nfile=substr ($record->{'checksum'},6); 101 | getobject("$config{'outdir'}/$config{'scmdir'}",$nfile); 102 | } else { 103 | warn("Unknown checksum: $record->{'checksum'}"); 104 | } 105 | } 106 | $dbh->disconnect; 107 | checkout(); 108 | 109 | } else { 110 | if (-e "$config{'scmdir'}/entries") { 111 | print STDERR "[i] Found old SVN client storage format!\n"; 112 | svnentries('',$config{'outdir'}); 113 | if ($config{'checkout'} and $config{'upgrade'}) { 114 | print STDERR "[i] Running upgrade, if you get errors, ignore if using older client\n"; 115 | system("svn upgrade"); 116 | } 117 | checkout(); 118 | print STDERR "[i] Due to limitations, to get full tree - run this utility few times!\n"; 119 | } else { 120 | print STDERR "[i] Could not identify SVN format. Are you sure it's SVN there?\n"; 121 | print STDERR "[i] Anyway, take a look at ".$config{'scmurl'}."/"."entries\n"; 122 | } 123 | } 124 | 125 | sub checkout { 126 | if ($config{'checkout'}) { 127 | print STDERR "[i] Trying to revert the tree, if you get error, upgrade your SVN client!\n"; 128 | system("svn revert -R ."); 129 | } 130 | } 131 | 132 | sub createsvndirs { 133 | my ($dir) = @_; 134 | mkdir $dir."/.svn"; 135 | mkdir $dir."/.svn/text-base"; 136 | mkdir $dir."/.svn/pristine"; 137 | mkdir $dir."/.svn/tmp"; 138 | } 139 | 140 | sub downloadsvnfiles { 141 | my ($url,$dir) = @_; 142 | foreach my $file (@scmfiles) { 143 | my $furl = "$url/$config{'scmdir'}/$file"; 144 | getfile($furl,"$dir/$config{'scmdir'}/$file"); 145 | } 146 | } 147 | 148 | sub svnentries { 149 | my ($url, $dir) = @_; 150 | 151 | createsvndirs("$dir"); 152 | my $svnentries = "$dir/$config{'scmdir'}/entries"; 153 | # getfile("/$svnentries","$dir/$svnentries"); 154 | # my $file="$dir/$svnentries"; 155 | 156 | downloadsvnfiles($url,$dir); 157 | 158 | open(SVN,"<$svnentries") or warn ("cannot open entries file '$svnentries': $!\n"); 159 | my $prevline; 160 | while () { 161 | chomp; 162 | if ($_ eq "dir") { 163 | if (not $prevline eq '') { 164 | my $newdir=$prevline; 165 | if (not -e $newdir) { 166 | mkdir $newdir; 167 | svnentries("$url/$newdir","$dir/$newdir"); 168 | } 169 | } 170 | } 171 | 172 | if ($_ eq "file") { 173 | my $newfile=$prevline; 174 | getfile("$url/.svn/text-base/$newfile.svn-base","$dir/.svn/text-base/$newfile.svn-base"); 175 | } 176 | $prevline=$_; 177 | } 178 | close(SVN); 179 | } 180 | 181 | 182 | sub getobject { 183 | my ($gd,$ref) = @_; 184 | my $rdir = substr ($ref,0,2); # first two chars of sha1 is dirname 185 | my $rfile = $ref.".svn-base"; # whole sha1 is filename 186 | mkdir $gd."/pristine/$rdir"; 187 | getfile($config{'scmdir'}."/pristine/$rdir/$rfile",$gd."/pristine/$rdir/$rfile"); 188 | } 189 | 190 | sub getfile { 191 | my ($file,$outfile) = @_; 192 | my $furl = $config{'regurl'}."/".$file; 193 | my $req = HTTP::Request->new(GET => $furl); 194 | # Pass request to the user agent and get a response back 195 | my $res = $ua->request($req); 196 | if ($res->is_success) { 197 | print STDERR "[d] found $file\n" if ($config{'verbose'}>1);; 198 | open (out,">$outfile") or die ("cannot open file '$outfile': $!"); 199 | print out $res->content; 200 | close (out); 201 | } else { 202 | print STDERR "[!] Not found for $furl => $file: ".$res->status_line."\n" 203 | if ($config{'verbose'}>1); 204 | } 205 | return $res; 206 | } 207 | 208 | sub help { 209 | print "DVCS-Ripper: rip-svn.pl. Copyright (C) Kost. Distributed under GPL.\n\n"; 210 | print "Usage: $0 [options] -u [svnurl] \n"; 211 | print "\n"; 212 | print " -c perform 'checkout' on end (default)\n"; 213 | print " -b Use branch (default: $config{'branch'})\n"; 214 | print " -a Use agent (default: $config{'agent'})\n"; 215 | print " -s ignore SSL certification verification\n"; 216 | print " -v verbose (-vv will be more verbose)\n"; 217 | print "\n"; 218 | 219 | print "Example: $0 -v -u http://www.example.com/.svn/\n"; 220 | print "Example: $0 # with url and options in $configfile\n"; 221 | 222 | exit 0; 223 | } 224 | 225 | --------------------------------------------------------------------------------