├── .gitignore
├── LICENSE
├── README.md
├── hg-decode.pl
├── rip-bzr.pl
├── rip-cvs.pl
├── rip-git.pl
├── rip-hg.pl
└── rip-svn.pl
/.gitignore:
--------------------------------------------------------------------------------
1 | blib/
2 | .build/
3 | _build/
4 | cover_db/
5 | inc/
6 | Build
7 | Build.bat
8 | .last_cover_stats
9 | Makefile
10 | Makefile.old
11 | MANIFEST.bak
12 | META.yml
13 | MYMETA.yml
14 | nytprof.out
15 | pm_to_blib
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://inventory.rawsec.ml/tools.html#dvcs-ripper)
2 | [](https://github.com/kost/dvcs-ripper/stargazers)
3 | [](https://github.com/kost/dvcs-ripper/blob/master/LICENSE)
4 |
5 | dvcs-ripper
6 | ===========
7 |
8 | Rip web accessible (distributed) version control systems: SVN, GIT, Mercurial/hg, bzr, ...
9 |
10 | It can rip repositories even when directory browsing is turned off.
11 |
12 | Make sure to position yourself in empty directory where you want repositories to be downloaded/cloned.
13 |
14 | ## Requirements
15 |
16 | - Perl
17 | - Perl modules:
18 | - required: LWP, IO::Socket::SSL
19 | - for newer SVN: DBD::SQlite and DBI
20 | - for faster GIT: Parallel::ForkManager, Redis and Algorithm::Combinatorics
21 | - (D)VCS client of what you want to rip (cvs, svn, git, hg, bzr, ...)
22 |
23 | ### Requirements on Debian/Ubuntu
24 |
25 | You can easily install perl requirements:
26 |
27 | `sudo apt-get install perl libio-socket-ssl-perl libdbd-sqlite3-perl libclass-dbi-perl libio-all-lwp-perl`
28 |
29 | Optional requirements (faster git rip):
30 | `sudo apt-get install libparallel-forkmanager-perl libredis-perl libalgorithm-combinatorics-perl`
31 |
32 | And if you need all clients supported:
33 |
34 | `sudo apt-get install cvs subversion git bzr mercurial`
35 |
36 | ## Docker
37 |
38 | In case you just want docker version, it is here:
39 |
40 | https://github.com/kost/docker-webscan/tree/master/alpine-dvcs-ripper
41 |
42 | Just say something like:
43 |
44 | `docker run --rm -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -v -u http://www.example.org/.git`
45 |
46 |
47 | GIT
48 | ===========
49 | Example run (for git):
50 |
51 | `rip-git.pl -v -u http://www.example.com/.git/`
52 |
53 | It will automatically do `git checkout -f`
54 |
55 | or if you would like to ignore SSL certification verification (with -s):
56 |
57 | `rip-git.pl -s -v -u http://www.example.com/.git/`
58 |
59 | Mercurial/HG
60 | ===========
61 | Example run (for hg):
62 |
63 | `rip-hg.pl -v -u http://www.example.com/.hg/`
64 |
65 | It will automatically do `hg revert `
66 |
67 | or if you would like to ignore SSL certification verification (with -s):
68 |
69 | `rip-hg.pl -s -v -u http://www.example.com/.hg/`
70 |
71 | Bazaar/bzr
72 | ===========
73 | Example run (for bzr):
74 |
75 | `rip-bzr.pl -v -u http://www.example.com/.bzr/`
76 |
77 | It will automatically do `bzr revert`
78 |
79 | or if you would like to ignore SSL certification verification (with -s):
80 |
81 | `rip-bzr.pl -s -v -u http://www.example.com/.bzr/`
82 |
83 |
84 | SVN
85 | ===========
86 | It supports OLDER and NEWER version of svn client formats. Older is with .svn files in every directory, while
87 | newer version have single .svn directory and wc.db in .svn directory. It will automatically detect which
88 | format is used on the target.
89 |
90 | Example run (for SVN):
91 |
92 | `rip-svn.pl -v -u http://www.example.com/.svn/`
93 |
94 | It will automatically do `svn revert -R .`
95 |
96 | CVS
97 | ===========
98 | Example run (for CVS):
99 |
100 | `rip-cvs.pl -v -u http://www.example.com/CVS/`
101 |
102 | This will not rip CVS, but it will display useful info.
103 |
104 | ## Advance usage examples
105 |
106 | Some examples how it can be used
107 |
108 | ### Output handling
109 |
110 | Download git tree to specific output dir:
111 |
112 | `rip-git.pl -o /my/previously/made/dir -v -u http://www.example.com/.git/`
113 |
114 | Download git tree to specific output dir (creating dir `http__www.example.com_.git_` for url):
115 |
116 | `rip-git.pl -m -o /dir -v -u http://www.example.com/.git/`
117 |
118 | ### Redis usage with docker
119 |
120 | Create Redis docker container:
121 |
122 | `docker run --rm --name myredis -it -v /my/host/dir/data:/data:rw k0st/alpine-redis`
123 |
124 | In another terminal, just link redis container and say something like this:
125 |
126 | `docker run --rm --link=myredis:redis -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -e docker -v -u http://www.example.org/.git -m -o /work`
127 |
128 | ### Using redis for resuming work of ripping
129 |
130 | Create Redis docker container:
131 |
132 | `docker run --name redisdvcs -it -v /my/host/dir/data:/data:rw k0st/alpine-redis`
133 |
134 | In another terminal, just link redis container and say something like this:
135 |
136 | `docker run --link=redisdvcs:redis -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -n -e docker -v -u http://www.example.org/.git -m -o /work`
137 |
138 | ### Abusing redis for massive parallel tasks
139 |
140 | Create global NFS and mount /work on each client. Create global Redis docker container:
141 |
142 | `docker run --name redisdvcs -it -v /my/host/dir/data:/data:rw k0st/alpine-redis`
143 |
144 | In another terminal, just link redis container and say something like this on 1st client
145 |
146 | `docker run -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -n -e global.docker.ip -v -u http://www.example.org/.git -t 10 -c -m -o /work`
147 |
148 | In another terminal, just link redis container and say something like this on 2nd client:
149 |
150 | `docker run -it -v /path/to/host/work:/work:rw k0st/alpine-dvcs-ripper rip-git.pl -n -e global.docker.ip -v -u http://www.example.org/.git -t 10 -c -m -o /work`
151 |
152 | and so on...
153 |
154 | You need to perform `git checkout -f` yourself on the end - of course!
155 |
156 | ## Future
157 |
158 | Feel free to implement something and send pull request. Feel free to suggest any feature. Lot of features
159 | actually were implemented by request
160 |
161 | ### ToDo
162 | - [ ] Recognize 404 pages which return 200 in SVN/CVS
163 | - [ ] Try to repeat each trick after previous trick was successful
164 | - [ ] Progress bars
165 |
166 | ### Done
167 | - [x] Support for brute forcing pack names
168 | - [x] Intelligent guessing of packed refs
169 | - [x] Support for objects/info/packs from https://www.kernel.org/pub/software/scm/git/docs/gitrepository-layout.html
170 | - [x] Recognize 404 pages which return 200
171 | - [x] Introduce ignore SSL/TLS verification in SVN/CVS
172 | - [x] Bzr support
173 |
174 |
--------------------------------------------------------------------------------
/hg-decode.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | use IO::Uncompress::Inflate qw(inflate $InflateError);
4 | use File::Path qw(make_path);
5 | use LWP::UserAgent;
6 | use File::Temp qw(tempfile tempdir);
7 |
8 | # First grab the database file
9 | my $target=$ARGV[0];
10 | my $hgurl="http://$ARGV[0]/.hg/dirstate";
11 | my $ua=LWP::UserAgent->new;
12 | $ua->agent("All Your Files Are Belong To Us/1.0");
13 | my $request=HTTP::Request->new(GET => $hgurl);
14 | my $result=$ua->request($request);
15 |
16 | if ($result->status_line !~ /^200/)
17 | {
18 | die "Could not find Mercurial database";
19 | }
20 |
21 | my ($dbfileh, $dbfilen) = tempfile();
22 | print $dbfileh $result->content;
23 | close $dbfileh;
24 |
25 | open(my $infile, "<", $dbfilen);
26 | binmode($infile);
27 |
28 | my $rawdata;
29 | my $p1;
30 | my $p2;
31 |
32 | read $infile, my $rawdata, 20;
33 | ($p1)=unpack("H*", $rawdata);
34 | read $infile, my $rawdata, 20;
35 | ($p2)=unpack("H*", $rawdata);
36 |
37 | my @index_entries = ();
38 | my $entries=0;
39 |
40 | do
41 | {
42 | my $entry = {};
43 | my $rawdata;
44 |
45 | read $infile, $rawdata, 17;
46 |
47 | ( $entry->{'status'},
48 | $entry->{'mode'},
49 | $entry->{'size'},
50 | $entry->{'mtime'},
51 | $entry->{'length'} ) = unpack "CNNNN", $rawdata;
52 |
53 | read $infile, $rawdata, $entry->{'length'};
54 | ( $entry->{'name'} ) = unpack "a" . $entry->{'length'}, $rawdata;
55 |
56 | push(@index_entries, $entry);
57 |
58 | } while (!eof($infile));
59 | close($infile);
60 | unlink($dbfilen);
61 | my $server=$ARGV[0];
62 |
63 | # Now extract the files
64 | foreach my $entry (@index_entries)
65 | {
66 | my $indexfile=".hg/store/data/" . $entry->{'name'};
67 | my $indexfh;
68 | my $rawdata;
69 | my $datafile=0;
70 |
71 | print "Extracting " . $entry->{'name'} . "\n";
72 |
73 | # mangle indexfile for the upper case wankery mercurial does
74 | $indexfile =~ s/_/__/g;
75 | $indexfile =~ s/([A-Z])/_\l$1/g;
76 | my $mangledname="";
77 |
78 | foreach my $char (split(//,$indexfile))
79 | {
80 | my $result=$char;
81 | if ($char lt ' ' || $char gt '~')
82 | {
83 | $result='~' . unpack(H2, $char);
84 | }
85 | $mangledname.=$result;
86 | }
87 |
88 | my $hgurl="http://$server/$mangledname" . ".i";
89 | my $fua=LWP::UserAgent->new;
90 | $fua->agent("All Your Files Are Belong To Us/1.0");
91 | my $frequest=HTTP::Request->new(GET => $hgurl);
92 | my $fresult=$fua->request($frequest);
93 |
94 | my ($dbfileh, $dbfilen) = tempfile();
95 | print $dbfileh $fresult->content;
96 | close $dbfileh;
97 |
98 | open $indexfh, "<", $dbfilen;
99 | binmode($indexfh);
100 |
101 | $hgurl="http://$server/$mangledname" . ".d";
102 | $frequest=HTTP::Request->new(GET => $hgurl);
103 | $fresult=$fua->request($frequest);
104 | if ($fresult->status_line =~ /^200/)
105 | {
106 | my ($dfileh, $dfilen) = tempfile();
107 | print $dfileh $fresult->content;
108 | close $dfileh;
109 | open $datafh, "<", $dfilen;
110 | $datafile=1;
111 | }
112 |
113 | # Make sure the path is there for the output
114 | my $outputpath="output/" . $entry->{'name'};
115 | $outputpath =~ s#/[^/]*$##g;
116 |
117 | make_path($outputpath);
118 | open $oh, ">", "output/$entry->{'name'}";
119 |
120 | do
121 | {
122 | my $head={};
123 |
124 | read $indexfh, $rawdata, 6;
125 | my $msb, $nmsb=0;
126 | ( $msb, $nmsb, $head->{'offset'} ) = unpack "CCN", $rawdata;
127 | $head{'offset'} = head->{'offset'} + ($nmsb << 32) + ($msb << 40);
128 |
129 | read $indexfh, $rawdata, 58;
130 |
131 | ( $head->{'flags'},
132 | $head->{'clength'},
133 | $head->{'ulength'},
134 | $head->{'base'},
135 | $head->{'link'},
136 | $head->{'p1'},
137 | $head->{'p2'},
138 | $head->{'nodeid'} ) = unpack "SNNNNNNH*",$rawdata;
139 |
140 | # Now read the data
141 | my $cookeddata;
142 | if ($head->{'clength'} > 0)
143 | {
144 | if ($datafile == 1)
145 | {
146 | read $datafh, $rawdata, $head->{'clength'};
147 | }
148 | else
149 | {
150 | read $indexfh, $rawdata, $head->{'clength'};
151 | }
152 | inflate(\$rawdata => \$cookeddata);
153 | }
154 |
155 | # And write it
156 | print $oh $cookeddata;
157 |
158 | } while (!eof($indexfh));
159 |
160 | close($indexfh);
161 | unlink($dbfilen);
162 | if ($datafile == 1) { close($datafh); unlink($dfilen) }
163 | close($oh);
164 | }
--------------------------------------------------------------------------------
/rip-bzr.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | use strict;
4 |
5 | use IO::Socket::SSL;
6 | use LWP;
7 | use LWP::UserAgent;
8 | use HTTP::Request;
9 | use HTTP::Response;
10 | use Getopt::Long;
11 | use Cwd;
12 |
13 | my $configfile="$ENV{HOME}/.rip-bzr";
14 | my %config;
15 | $config{'bzrdir'} = ".bzr";
16 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
17 | $config{'verbose'}=0;
18 | $config{'checkout'}=1;
19 |
20 | $config{'respdetectmax'}=3;
21 | $config{'resp404size'}=256;
22 | $config{'resp404reqsize'}=32;
23 |
24 | sub randomstr {
25 | my($num) = @_;
26 | my @chars = ("A".."Z", "a".."z");
27 | my $string;
28 | $string .= $chars[rand @chars] for 1..$num;
29 | return $string;
30 | }
31 |
32 | if (-e $configfile) {
33 | open(CONFIG,"<$configfile") or next;
34 | while () {
35 | chomp; # no newline
36 | s/#.*//; # no comments
37 | s/^\s+//; # no leading white
38 | s/\s+$//; # no trailing white
39 | next unless length; # anything left?
40 | my ($var, $value) = split(/\s*=\s*/, $_, 2);
41 | $config{$var} = $value;
42 | }
43 | close(CONFIG);
44 | }
45 |
46 | Getopt::Long::Configure ("bundling");
47 |
48 | my $result = GetOptions (
49 | "a|agent=s" => \$config{'agent'},
50 | "b|branch=s" => \$config{'branch'},
51 | "u|url=s" => \$config{'url'},
52 | "p|proxy=s" => \$config{'proxy'},
53 | "c|checkout!" => \$config{'checkout'},
54 | "s|sslignore!" => \$config{'sslignore'},
55 | "v|verbose+" => \$config{'verbose'},
56 | "h|help" => \&help
57 | );
58 |
59 | my @knownfiles=(
60 | 'branch-format',
61 | 'branch/branch.conf',
62 | 'branch/format',
63 | 'branch/last-revision',
64 | 'branch/tags',
65 | 'checkout/conflicts',
66 | 'checkout/dirstate',
67 | 'checkout/format',
68 | 'checkout/merge-hashes',
69 | 'checkout/views',
70 | 'repository/format',
71 | 'repository/pack-names'
72 | );
73 |
74 | my $ua = LWP::UserAgent->new;
75 |
76 | $ua->agent($config{'agent'});
77 |
78 | if ($config{'sslignore'}) {
79 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0);
80 | }
81 | if ($config{'proxy'}) {
82 | # for socks proxy make sure you have LWP::Protocol::socks
83 | $ua->proxy(['http', 'https'], $config{'proxy'});
84 | }
85 |
86 | my $ddir=$config{'bzrdir'}."/";
87 |
88 | mkdir $ddir;
89 | mkdir $ddir."branch";
90 | mkdir $ddir."checkout";
91 | mkdir $ddir."repository";
92 | mkdir $ddir."repository/indices";
93 | mkdir $ddir."repository/packs";
94 |
95 | print STDERR "[i] Downloading bzr files from $config{'url'}\n" if ($config{'verbose'}>0);
96 |
97 | my @resp404;
98 | my $respdetectmax=$config{'respdetectmax'};
99 | print STDERR "[i] Auto-detecting 404 as 200 with $config{'respdetectmax'} requests\n" if ($config{'verbose'}>0);
100 | $config{'resp404correct'}=0;
101 | for (my $i=0; $i<$respdetectmax;$i++) {
102 | my $resp=getreq(randomstr($config{'resp404reqsize'}));
103 | if ($resp->is_success) {
104 | push @resp404, $resp;
105 | } else {
106 | $config{'resp404correct'}=1;
107 | last; # exit loop
108 | }
109 | }
110 |
111 | if ($config{'resp404correct'}) {
112 | print STDERR "[i] Getting correct 404 responses\n";
113 | } else {
114 | print STDERR "[i] Getting 200 as 404 responses. Adapting...\n";
115 | my $oldchopresp = substr($resp404[0]->content,0,$config{'resp404size'});
116 | foreach my $entry (@resp404) {
117 | my $chopresp=substr($entry->content,0,$config{'resp404size'});
118 | if ($oldchopresp eq $chopresp) {
119 | $oldchopresp=substr($entry->content,0,$config{'resp404size'});
120 | } else {
121 | print STDERR "[i] 404 responses are different, you will have to customize script source code\n";
122 | $config{'resp404content'}=$chopresp;
123 | last; # exit loop
124 | }
125 | }
126 | $config{'resp404content'}=$oldchopresp;
127 | }
128 |
129 | foreach my $file (@knownfiles) {
130 | getfile($file,$ddir.$file);
131 | }
132 |
133 |
134 | my $tofetch=0;
135 | my $fetched=0;
136 | my $pcount=1;
137 | my $fcount=0;
138 | while ($pcount>0) {
139 | print STDERR "[i] Running bzr check to check for missing items\n" if ($config{'verbose'}>0);
140 | open(PIPE,"bzr check 2>&1 |") or die "cannot find bzr: $!";
141 | $pcount=0;
142 | $fcount=0;
143 | while () {
144 | print $_ if ($config{'verbose'}>9);
145 | chomp;
146 | if (/ERROR:/) {
147 | $tofetch++;
148 | m/'(.*?)'/;
149 | my $missingfile = $1;
150 | my $curdir = getcwd."/".$config{'bzrdir'}."/";
151 | substr $missingfile, index($missingfile, $curdir), length $curdir, '';
152 | print STDERR "[i] Getting $missingfile\n" if ($config{'verbose'}>0);
153 | my $res=getfile($missingfile,$ddir.$missingfile);
154 | if ($res->is_success) {
155 | $fcount++;
156 | $fetched++;
157 | }
158 | $pcount++;
159 | }
160 | }
161 | close(PIPE);
162 | print STDERR "[i] Got items with bzr check: $pcount\n" if ($config{'verbose'}>0);
163 | print STDERR "[i] Items fetched: $fcount\n" if ($config{'verbose'}>0);
164 | if ($fcount == 0) {
165 | last;
166 | }
167 | }
168 |
169 | print STDERR "[i] Finished fetching ($fetched/$tofetch)\n";
170 |
171 | if ($config{'checkout'}) {
172 | print STDERR "[i] Checking out/Reverting source by calling bzr revert\n";
173 | system("bzr revert");
174 | }
175 |
176 | # -- END
177 |
178 | sub getreq {
179 | my ($file) = @_;
180 | my $furl = $config{'url'}."/".$file;
181 | my $req = HTTP::Request->new(GET => $furl);
182 | # Pass request to the user agent and get a response back
183 | my $res = $ua->request($req);
184 | return $res;
185 | }
186 |
187 | sub getfile {
188 | my ($file,$outfile) = @_;
189 | my $furl = $config{'url'}."/".$file;
190 | my $req = HTTP::Request->new(GET => $furl);
191 | # Pass request to the user agent and get a response back
192 | my $res = $ua->request($req);
193 | if ($res->is_success) {
194 | if (not $config{'resp404correct'}) {
195 | print STDERR "[d] got 200 for $file, but checking content\n" if ($config{'verbose'}>1);;
196 | my $chopresp=substr($res->content,0,$config{'resp404size'});
197 | if ($chopresp eq $config{'resp404content'}) {
198 | print STDERR "[!] Not found for $file: 404 as 200\n"
199 | if ($config{'verbose'}>0);
200 | return $res;
201 | }
202 | }
203 | print STDERR "[d] found $file\n" if ($config{'verbose'}>0);;
204 | open (out,">$outfile") or die ("cannot open file $outfile: $!");
205 | print out $res->content;
206 | close (out);
207 | } else {
208 | print STDERR "[!] Not found for $file: ".$res->status_line."\n"
209 | if ($config{'verbose'}>0);
210 | }
211 | return $res;
212 | }
213 |
214 | sub help {
215 | print "DVCS-Ripper: rip-bzr.pl. Copyright (C) Kost. Distributed under GPL.\n\n";
216 | print "Usage: $0 [options] -u [bzrurl] \n";
217 | print "\n";
218 | print " -c perform 'bzr revert' on end (default)\n";
219 | print " -b Use branch (default: $config{'branch'})\n";
220 | print " -a Use agent (default: $config{'agent'})\n";
221 | print " -s do not verify SSL cert\n";
222 | print " -p use proxy for connections\n";
223 | print " -v verbose (-vv will be more verbose)\n";
224 | print "\n";
225 | print "Example: $0 -v -u http://www.example.com/.bzr/\n";
226 | print "Example: $0 # with url and options in $configfile\n";
227 | print "Example: $0 -v -u -p socks://localhost:1080 http://www.example.com/.bzr/\n";
228 | print "For socks like proxy, make sure you have LWP::Protocol::socks\n";
229 |
230 | exit 0;
231 | }
232 |
233 |
--------------------------------------------------------------------------------
/rip-cvs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | use strict;
4 |
5 | use IO::Socket::SSL;
6 | use LWP;
7 | use LWP::UserAgent;
8 | use HTTP::Request;
9 | use Getopt::Long;
10 |
11 | my $configfile="$ENV{HOME}/.rip-cvs";
12 | my %config;
13 | $config{'branch'} = "HEAD";
14 | $config{'scmdir'} = "CVS";
15 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
16 | $config{'verbose'}=0;
17 | $config{'checkout'}=1;
18 | $config{'outdir'}='./';
19 | $config{'rlevel'}=9;
20 |
21 | if (-e $configfile) {
22 | open(CONFIG,"<$configfile") or next;
23 | while () {
24 | chomp; # no newline
25 | s/#.*//; # no comments
26 | s/^\s+//; # no leading white
27 | s/\s+$//; # no trailing white
28 | next unless length; # anything left?
29 | my ($var, $value) = split(/\s*=\s*/, $_, 2);
30 | $config{$var} = $value;
31 | }
32 | close(CONFIG);
33 | }
34 |
35 | Getopt::Long::Configure ("bundling");
36 |
37 | my $result = GetOptions (
38 | "a|agent=s" => \$config{'agent'},
39 | "b|branch=s" => \$config{'branch'},
40 | "u|url=s" => \$config{'url'},
41 | "c|checkout!" => \$config{'checkout'},
42 | "s|sslignore!" => \$config{'sslignore'},
43 | "v|verbose+" => \$config{'verbose'},
44 | "h|help" => \&help
45 | );
46 |
47 | my @scmfiles=(
48 | "Repository",
49 | "Root",
50 | "Entries"
51 | );
52 |
53 | if ($config{'verbose'}>3) {
54 | foreach my $key ( keys %config )
55 | {
56 | print "[c] $key => $config{$key}\n";
57 | }
58 | }
59 |
60 | my $ua = LWP::UserAgent->new;
61 | $ua->agent($config{'agent'});
62 |
63 | if ($config{'sslignore'}) {
64 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0);
65 | }
66 |
67 | # normalize URL
68 | if ($config{'url'} =~ /\/CVS/) {
69 | $config{'scmurl'} = $config{'url'};
70 | $config{'regurl'} = $config{'url'};
71 | $config{'regurl'} =~ s/\/CVS//;
72 | } else {
73 | $config{'scmurl'} = $config{'url'}."/CVS";
74 | $config{'regurl'} = $config{'url'};
75 | }
76 |
77 | processcvs ('',$config{'outdir'},0);
78 |
79 | sub processcvs {
80 | my ($url,$dir,$level) = @_;
81 | createcvsdirs ($dir);
82 | downloadcvsfiles ($url,$dir);
83 |
84 | return if ($level>$config{'rlevel'});
85 |
86 | my $cntfile;
87 |
88 | my $ident=" "x$level;
89 |
90 | if (-e "$dir/$config{'scmdir'}/Root" and $level==0) {
91 | $cntfile++;
92 | print "$ident"."[i] CVSROOT=";
93 | displayfile("$dir/$config{'scmdir'}/Root");
94 | }
95 |
96 | if (-e "$dir/$config{'scmdir'}/Repository" and $level==0) {
97 | $cntfile++;
98 | print "$ident"."[i] cvs checkout ";
99 | displayfile("$dir/$config{'scmdir'}/Repository");
100 | }
101 |
102 | if (-e "$dir/$config{'scmdir'}/Entries") {
103 | $cntfile++;
104 | my $cont=readfile("$dir/$config{'scmdir'}/Entries");
105 | # print $cont;
106 | # print sprintf "%s%1s %-25s %-14s %22s\n", "T", "Name", "Revision", "Date";
107 | foreach ( split /\n/, $cont ) {
108 | if (/\//) {
109 | my @rec = split(/\//);
110 | print sprintf "%s%1s %-38s %-14s %22s\n", $ident, $rec[0], $rec[1], $rec[2], $rec[3];
111 | if ($rec[0] eq 'D') {
112 | mkdir "$dir/$rec[1]";
113 | processcvs("$url/$rec[1]","$dir/$rec[1]",$level+1);
114 | }
115 | }
116 | }
117 | }
118 |
119 | if ($level==0) {
120 | if ($cntfile > 0) {
121 | print STDERR "$ident"."[i] CVS identified on $config{'url'} by $cntfile guesses\n";
122 | } else {
123 | print STDERR "$ident"."[i] CVS not identified, check URL: $config{'url'}\n";
124 | }
125 | }
126 |
127 | }
128 |
129 |
130 | sub displayfile {
131 | my ($file) = @_;
132 | open (FILE, "<$file") or warn ("cannot open $file: $!");
133 | while () {
134 | print $_;
135 | }
136 | close (FILE);
137 | }
138 |
139 | sub readfile {
140 | my ($file) = @_;
141 | open (FILE, "<$file") or warn ("cannot open $file: $!");
142 | my $str;
143 | while () {
144 | $str=$str.$_;
145 | }
146 | close (FILE);
147 | # print ":$str:\n";
148 | return ($str);
149 | }
150 |
151 | sub createcvsdirs {
152 | my ($dir) = @_;
153 | mkdir $dir."/CVS";
154 | }
155 |
156 | sub downloadcvsfiles {
157 | my ($url,$dir) = @_;
158 | foreach my $file (@scmfiles) {
159 | my $furl = "$url/$config{'scmdir'}/$file";
160 | getfile($furl,"$dir/$config{'scmdir'}/$file");
161 | }
162 | }
163 |
164 | sub getfile {
165 | my ($file,$outfile) = @_;
166 | my $furl = $config{'regurl'}."/".$file;
167 | my $req = HTTP::Request->new(GET => $furl);
168 | # Pass request to the user agent and get a response back
169 | my $res = $ua->request($req);
170 | if ($res->is_success) {
171 | print STDERR "[d] found $file\n" if ($config{'verbose'}>1);;
172 | open (out,">$outfile") or die ("cannot open file '$outfile': $!");
173 | print out $res->content;
174 | close (out);
175 | } else {
176 | print STDERR "[!] Not found for $furl => $file: ".$res->status_line."\n"
177 | if ($config{'verbose'}>1);
178 | }
179 | return $res;
180 | }
181 |
182 | sub help {
183 | print "DVCS-Ripper: rip-cvs.pl. Copyright (C) Kost. Distributed under GPL.\n\n";
184 | print "Usage: $0 [options] -u [url] \n";
185 | print "\n";
186 | print " -c perform 'checkout' on end (default)\n";
187 | print " -b Use branch (default: $config{'branch'})\n";
188 | print " -a Use agent (default: $config{'agent'})\n";
189 | print " -s ignore SSL certification verification\n";
190 | print " -v verbose (-vv will be more verbose)\n";
191 | print "\n";
192 |
193 | print "Example: $0 -v -u http://www.example.com/CVS/\n";
194 | print "Example: $0 # with url and options in $configfile\n";
195 |
196 | exit 0;
197 | }
198 |
199 |
--------------------------------------------------------------------------------
/rip-git.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | use strict;
4 |
5 | use Cwd;
6 | use IPC::SysV qw(IPC_PRIVATE S_IRWXU IPC_CREAT SEM_UNDO ftok);
7 | use IPC::Semaphore;
8 | use IPC::SharedMem;
9 |
10 | use IO::Socket::SSL;
11 | use LWP;
12 | use LWP::UserAgent;
13 | use HTTP::Request;
14 | use HTTP::Response;
15 | use Getopt::Long;
16 |
17 | use Digest::SHA qw(sha1 sha1_hex);
18 |
19 | my $configfile="$ENV{HOME}/.rip-git";
20 | my %config;
21 | $config{'branch'} = "master";
22 | $config{'gitdir'} = ".git";
23 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
24 | $config{'verbose'}=0;
25 | $config{'checkout'}=1;
26 |
27 | $config{'redirects'}=0;
28 |
29 | $config{'respdetectmax'}=3;
30 | $config{'resp404size'}=256;
31 | $config{'resp404reqsize'}=32;
32 |
33 | $config{'gitpackbasename'}='pack';
34 |
35 | sub randomstr {
36 | my($num) = @_;
37 | my @chars = ("A".."Z", "a".."z");
38 | my $string;
39 | $string .= $chars[rand @chars] for 1..$num;
40 | return $string;
41 | }
42 |
43 | if (-e $configfile) {
44 | open(CONFIG,"<$configfile") or next;
45 | while () {
46 | chomp; # no newline
47 | s/#.*//; # no comments
48 | s/^\s+//; # no leading white
49 | s/\s+$//; # no trailing white
50 | next unless length; # anything left?
51 | my ($var, $value) = split(/\s*=\s*/, $_, 2);
52 | $config{$var} = $value;
53 | }
54 | close(CONFIG);
55 | }
56 |
57 | Getopt::Long::Configure ("bundling");
58 |
59 | my $result = GetOptions (
60 | "a|agent=s" => \$config{'agent'},
61 | "b|branch=s" => \$config{'branch'},
62 | "c|checkout!" => \$config{'checkout'},
63 | "e|redis=s" => \$config{'redis'},
64 | "g|guess" => \$config{'intguess'},
65 | "k|session=s" => \$config{'session'},
66 | "n|newer" => \$config{'newer'},
67 | "m|mkdir" => \$config{'mkdir'},
68 | "o|output=s" => \$config{'output'},
69 | "p|proxy=s" => \$config{'proxy'},
70 | "r|redirects=i" => \$config{'redirects'},
71 | "s|sslignore!" => \$config{'sslignore'},
72 | "t|tasks=i" => \$config{'tasks'},
73 | "u|url=s" => \$config{'url'},
74 | "x|brute" => \$config{'brute'},
75 | "v|verbose+" => \$config{'verbose'},
76 | "ba|basicauth=s" => \$config{'basicauth'},
77 | "h|help" => \&help
78 | );
79 |
80 | my @gitfiles=(
81 | "COMMIT_EDITMSG",
82 | "config",
83 | "description",
84 | "HEAD",
85 | "index",
86 | "packed-refs"
87 | );
88 |
89 | my $cwd=cwd();
90 | my $urldir=$config{'url'};
91 | $urldir=~s#[;:&~/]#_#ig;
92 |
93 | if ($config{'output'}) {
94 | $cwd = cwd();
95 | if ($config{'mkdir'}) {
96 | mkdir $config{'output'}."/".$urldir;
97 | chdir $config{'output'}."/".$urldir;
98 | } else {
99 | chdir $config{'output'};
100 | }
101 | }
102 |
103 | my @commits;
104 | my $ua = LWP::UserAgent->new;
105 |
106 | $ua->agent($config{'agent'});
107 | $ua->max_redirect($config{'redirects'});
108 | if($config{'basicauth'}) {
109 | my $key = sprintf '%s %s', "Basic", $config{'basicauth'};
110 | $ua->default_header('Authorization' => $key);
111 | }
112 |
113 |
114 | if ($config{'sslignore'}) {
115 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0);
116 | }
117 | if ($config{'proxy'}) {
118 | # for socks proxy make sure you have LWP::Protocol::socks
119 | $ua->proxy(['http', 'https'], $config{'proxy'});
120 | }
121 |
122 | my $gd=$config{'gitdir'}."/";
123 |
124 | mkdir $gd;
125 |
126 | print STDERR "[i] Downloading git files from $config{'url'}\n" if ($config{'verbose'}>0);
127 |
128 | if ($config{'verbose'}>2) {
129 | print STDERR "[i] Using agent: $config{'agent'}\n";
130 | print STDERR "[i] Using redirects: $config{'redirects'}\n";
131 | print STDERR "[i] Using proxy: $config{'proxy'}\n";
132 | }
133 |
134 | my @resp404;
135 | my $respdetectmax=$config{'respdetectmax'};
136 | print STDERR "[i] Auto-detecting 404 as 200 with $config{'respdetectmax'} requests\n" if ($config{'verbose'}>0);
137 | $config{'resp404correct'}=0;
138 | for (my $i=0; $i<$respdetectmax;$i++) {
139 | my $resp=getreq(randomstr($config{'resp404reqsize'}));
140 | if ($resp->is_success) {
141 | push @resp404, $resp;
142 | } else {
143 | $config{'resp404correct'}=1;
144 | last; # exit loop
145 | }
146 | }
147 |
148 | if ($config{'resp404correct'}) {
149 | print STDERR "[i] Getting correct 404 responses\n" if ($config{'verbose'}>0);
150 | } else {
151 | print STDERR "[i] Getting 200 as 404 responses. Adapting...\n" if ($config{'verbose'}>0);
152 | my $oldchopresp = substr($resp404[0]->content,0,$config{'resp404size'});
153 | foreach my $entry (@resp404) {
154 | my $chopresp=substr($entry->content,0,$config{'resp404size'});
155 | if ($oldchopresp eq $chopresp) {
156 | $oldchopresp=substr($entry->content,0,$config{'resp404size'});
157 | } else {
158 | print STDERR "[i] 404 responses are different, you will have to customize script source code\n";
159 | $config{'resp404content'}=$chopresp;
160 | last; # exit loop
161 | }
162 | }
163 | $config{'resp404content'}=$oldchopresp;
164 | }
165 |
166 | unless ($config{'session'}) {
167 | $config{'session'}=randomstr(8);
168 | }
169 |
170 | print STDERR "[i] Using session name: $config{'session'}\n";
171 |
172 | my $haveredis = eval
173 | {
174 | require Redis;
175 | Redis->import();
176 | 1;
177 | };
178 |
179 | my $havealg = eval {
180 | require Algorithm::Combinatorics;
181 | Algorithm::Combinatorics->import(qw(variations_with_repetition permutations));
182 | 1;
183 | };
184 |
185 | if ($config{'redis'}) {
186 | if ($haveredis) {
187 | if ($ENV{'REDIS_PORT_6379_TCP_ADDR'}) {
188 | print STDERR "[i] Detected redis docker environment variable, overriding: $config{'redis'}\n";
189 | $config{'redis'}=$ENV{'REDIS_PORT_6379_TCP_ADDR'};
190 | }
191 | print STDERR "[i] Using redis: $config{'redis'}\n";
192 | $config{'redisobj'} = Redis->new(server => $config{'redis'});
193 | $config{'redis-good'} = $config{'session'}."-good";
194 | $config{'redis-bad'} = $config{'session'}."-bad";
195 | } else {
196 | print STDERR "[i] Please install Perl Redis module\n";
197 | }
198 | }
199 |
200 |
201 | foreach my $file (@gitfiles) {
202 | my $furl = $config{'url'}."/".$file;
203 | getfile($file,$gd.$file);
204 | }
205 |
206 | mkdir $gd."logs";
207 | mkdir $gd."logs/refs";
208 | mkdir $gd."logs/refs/heads";
209 | mkdir $gd."logs/refs/remotes";
210 |
211 | mkdir $gd."objects";
212 | mkdir $gd."objects/info";
213 | mkdir $gd."objects/pack";
214 |
215 | getfile("objects/info/alternates",$gd."objects/info/alternates");
216 |
217 | mkdir $gd."info";
218 | getfile("info/grafts",$gd."info/grafts");
219 |
220 | my $res = getfile("logs/HEAD",$gd."logs/HEAD");
221 |
222 | my @lines = split /\n/, $res->content;
223 | foreach my $line (@lines) {
224 | my @fields=split(/\s+/, $line);
225 | my $ref = $fields[1];
226 | getobject($gd,$ref);
227 | }
228 |
229 | mkdir $gd."refs";
230 | mkdir $gd."refs/heads";
231 | my $res = getfile("refs/heads/".$config{'branch'},$gd."refs/heads/".$config{'branch'});
232 | mkdir $gd."refs/remotes";
233 | mkdir $gd."refs/tags";
234 |
235 | # process packs file: objects/info/packs
236 | my $infopacks='objects/info/packs';
237 | my $res=getrealreq($infopacks);
238 | if ($res->is_success) {
239 | print STDERR "[!] found info file for packs, trying to process them: $infopacks\n" if ($config{'verbose'}>0);
240 | writefile($gd.$infopacks,$res->content);
241 | my @items=split("\n",$res->content);
242 | foreach my $item (@items) {
243 | print STDERR "[d] processing packs entry: $item\n" if ($config{'verbose'}>1);
244 | my ($imark,$ifile) = split(" ",$item);
245 | my $packfn="objects/pack/$ifile";
246 | getfile($packfn,$gd.$packfn);
247 | $packfn=~s/\.pack$/.idx/g;
248 | getfile($packfn,$gd.$packfn);
249 | }
250 | }
251 |
252 | # Parallel Tasks magic
253 | my $haveppf = eval
254 | {
255 | require Parallel::ForkManager;
256 | Parallel::ForkManager->import();
257 | 1;
258 | };
259 | my $pm;
260 | my $sem;
261 | my $shm;
262 | my $shmsize=16;
263 | if ($config{'tasks'}>0) {
264 | if ($haveppf) {
265 | $pm = Parallel::ForkManager->new($config{'tasks'});
266 | $sem = new IPC::Semaphore( ftok( $0, 0 ), 1, S_IRWXU | IPC_CREAT );
267 | if ($sem) {
268 | $sem->setval(0,0);
269 | $shm = IPC::SharedMem->new(IPC_PRIVATE, 16, S_IRWXU);
270 | } else {
271 | die("Error creating IPC Semaphore: $!\n");
272 | }
273 | print STDERR "[i] Using $config{'tasks'} parallel tasks\n" if ($config{'verbose'}>0);
274 |
275 | } else {
276 | print STDERR "[!] Please install Parallel::Prefork CPAN module for parallel requests\n";
277 | $config{'tasks'}=0;
278 | }
279 | }
280 |
281 | my $pcount=1;
282 | my $fcount=0;
283 | while ($pcount>0) {
284 | print STDERR "[i] Running git fsck to check for missing items\n" if ($config{'verbose'}>0);
285 | open(PIPE,"git fsck |") or die "cannot find git: $!";
286 | $pcount=0;
287 | $fcount=0;
288 | if ($config{'tasks'}>0) {
289 | $sem->setval(0,0);
290 | $shm->write($fcount,0,$shmsize);
291 | }
292 | while () {
293 | chomp;
294 | if (/^missing/) {
295 | my @getref = split (/\s+/);
296 | $pcount++;
297 | if ($config{'tasks'}>0) {
298 | $pm->start() and next;
299 | my $res = getobject($gd,$getref[2]); # 3rd field is sha1
300 | if ($res->is_success) {
301 | $sem->op( 0, 1, SEM_UNDO );
302 | $fcount=$shm->read(0, $shmsize);
303 | $shm->write($fcount+1,0,$shmsize);
304 | $sem->op( 0, -1, SEM_UNDO );
305 | }
306 | $pm->finish;
307 | } else {
308 | my $res = getobject($gd,$getref[2]); # 3rd field is sha1
309 | if ($res->is_success) {
310 | $fcount++;
311 | }
312 | }
313 | }
314 | }
315 | if ($config{'tasks'}>0) {
316 | print STDERR "[i] Waiting for children to finish\n" if ($config{'verbose'}>0);
317 | $pm->wait_all_children();
318 | $fcount = $shm->read(0, $shmsize);
319 | }
320 | close(PIPE);
321 | print STDERR "[i] Got items with git fsck: $pcount, Items fetched: $fcount\n" if ($config{'verbose'}>0);
322 | if ($fcount == 0) {
323 | print STDERR "[!] No more items to fetch. That's it!\n";
324 | last;
325 | }
326 | }
327 |
328 | if ($config{'intguess'}) {
329 | intguess();
330 | }
331 |
332 | if ($config{'brute'}) {
333 | bruteguess();
334 | }
335 |
336 | if ($config{'redisobj'}) {
337 | print STDERR "[i] Closing redis connection\n" if ($config{'verbose'}>0);
338 | $config{'redisobj'}->quit;
339 | }
340 |
341 | if ($config{'checkout'}) {
342 | system("git checkout -f");
343 | }
344 |
345 | if ($config{'output'}) {
346 | chdir $cwd;
347 | }
348 |
349 | sub bruteguess {
350 | print STDERR "[!] Performing pure brute force guessing of packed refs\n";
351 | my $pmb;
352 | my @digestchars=qw(0 1 2 3 4 5 6 7 8 9 0 a b c d e f);
353 | my $iter = variations_with_repetition(\@digestchars, 40);
354 | if ($config{'tasks'}>0) {
355 | if ($haveppf) {
356 | $pmb = Parallel::ForkManager->new($config{'tasks'});
357 | }
358 | }
359 | while (my $c = $iter->next) {
360 | my $p="";
361 | foreach my $i (@{$c}) { $p = $p.$i }
362 | print STDERR "[i] Brute forcing digest item: $p \n" if ($config{'verbose'}>0);
363 | if ($config{'tasks'}>0) {
364 | $pmb->start() and next;
365 | getpackedref($p);
366 | $pmb->finish;
367 | } else {
368 | getpackedref($p);
369 | }
370 | }
371 | if ($config{'tasks'}>0) {
372 | print STDERR "[i] Waiting for children to finish\n" if ($config{'verbose'}>0);
373 | $pmb->wait_all_children();
374 | }
375 | print STDERR "[!] Finished brute force guessing of packed refs. Does world still exists? :)\n";
376 | }
377 |
378 | # get packed refs from given digest
379 | sub getpackedref {
380 | my ($digest) = @_;
381 |
382 | my $packfn="objects/pack/".$digest.".pack";
383 | getfile($packfn,$gd.$packfn);
384 | my $idxfn="objects/pack/".$digest.".idx";
385 | getfile($idxfn,$gd.$idxfn);
386 | }
387 |
388 | # calculate possible digest from array of digests
389 | sub getintitem {
390 | my ($p) = @_;
391 |
392 | my $sha = Digest::SHA->new(1); # use SHA-1
393 | foreach my $item (@{$p}) {
394 | $sha->add($item."\n");
395 | }
396 | my $digestguess=$sha->hexdigest();
397 | getpackedref($digestguess);
398 | }
399 |
400 | # try to intelligently guess packed refs
401 | sub intguess {
402 | print STDERR "[!] Performing intelligent guessing of packed refs\n";
403 | my @missingitems = $config{'redis-bad'};
404 | my $iter = permutations(\@missingitems);
405 | my $pmg;
406 | if ($config{'tasks'}>0) {
407 | if ($haveppf) {
408 | $pmg = Parallel::ForkManager->new($config{'tasks'});
409 | }
410 | }
411 | while (my $p = $iter->next) {
412 | print STDERR "[i] Guessing item from permutations\n" if ($config{'verbose'}>0);
413 | if ($config{'tasks'}>0) {
414 | $pmg->start() and next;
415 | getintitem($p);
416 | $pmg->finish;
417 | } else {
418 | getintitem($p);
419 | }
420 | }
421 | if ($config{'tasks'}>0) {
422 | print STDERR "[i] Waiting for children to finish\n" if ($config{'verbose'}>0);
423 | $pmg->wait_all_children();
424 | }
425 | print STDERR "[!] Finished intelligent guessing of packed refs\n";
426 | }
427 |
428 | sub getobject {
429 | my ($gd,$ref) = @_;
430 | my $rdir = substr ($ref,0,2);
431 | my $rfile = substr ($ref,2);
432 | my $redisc;
433 | if ($config{'redisobj'}) {
434 | $redisc = Redis->new(server => $config{'redis'});
435 | }
436 | if ($config{'redisobj'}) {
437 | if ($redisc->hexists($config{'redis-bad'},$ref)) {
438 | $redisc->quit;
439 | return HTTP::Response->new(404);
440 | }
441 | if ($redisc->hexists($config{'redis-good'},$ref)) {
442 | $redisc->quit;
443 | return HTTP::Response->new(200);
444 | }
445 | print STDERR "[!] Not found in redis cache: $ref\n" if ($config{'verbose'}>1);;
446 | }
447 | mkdir $gd."objects/$rdir";
448 | my $r=getfile("objects/$rdir/$rfile",$gd."objects/$rdir/$rfile");
449 | if ($config{'redisobj'}) {
450 | if ($r->is_success) {
451 | $redisc->hset($config{'redis-good'}, $ref, 200);
452 | } else {
453 | $redisc->hset($config{'redis-bad'}, $ref, 404);
454 | }
455 | $redisc->quit;
456 | }
457 | return $r;
458 | }
459 |
460 | sub getreq {
461 | my ($file) = @_;
462 | my $furl = $config{'url'}."/".$file;
463 | my $req = HTTP::Request->new(GET => $furl);
464 | # Pass request to the user agent and get a response back
465 | my $res = $ua->request($req);
466 | return $res;
467 | }
468 |
469 | sub getrealreq {
470 | my ($file) = @_;
471 | my $res = getreq($file);
472 | if ($res->is_success) {
473 | if (not $config{'resp404correct'}) {
474 | print STDERR "[d] got 200 for packs but checking content\n" if ($config{'verbose'}>1);
475 | my $chopresp=substr($res->content,0,$config{'resp404size'});
476 | if ($chopresp eq $config{'resp404content'}) {
477 | print STDERR "[!] Not found for: 404 as 200\n"
478 | if ($config{'verbose'}>0);
479 | # return not found
480 | my $r = HTTP::Response->new(404);
481 | # $r = HTTP::Response->new( $code, $msg, $header, $content )
482 | return $r;
483 | }
484 | }
485 | }
486 | return $res;
487 | }
488 |
489 | sub writefile {
490 | my ($file, $content) = @_;
491 | open(my $fh, '>', $file) or return undef;
492 | print $fh $content;
493 | close $fh;
494 | }
495 |
496 | sub getfile {
497 | my ($file,$outfile) = @_;
498 | if ($config{'newer'}) {
499 | if (-e $outfile) {
500 | print STDERR "[!] Not overwriting file: $outfile\n" if ($config{'verbose'}>0);
501 | my $r = HTTP::Response->new(200);
502 | return $r;
503 | }
504 | }
505 | my $furl = $config{'url'}."/".$file;
506 | my $req = HTTP::Request->new(GET => $furl);
507 | # Pass request to the user agent and get a response back
508 | my $res = $ua->request($req);
509 | if ($res->is_success) {
510 | if (not $config{'resp404correct'}) {
511 | print STDERR "[d] got 200 for $file, but checking content\n" if ($config{'verbose'}>1);;
512 | my $chopresp=substr($res->content,0,$config{'resp404size'});
513 | if ($chopresp eq $config{'resp404content'}) {
514 | print STDERR "[!] Not found for $file: 404 as 200\n"
515 | if ($config{'verbose'}>0);
516 | my $r = HTTP::Response->new(404);
517 | return $r;
518 | }
519 | }
520 | print STDERR "[d] found $file\n" if ($config{'verbose'}>0);;
521 | open (out,">$outfile") or die ("cannot open file: $!");
522 | print out $res->content;
523 | close (out);
524 | } else {
525 | print STDERR "[!] Not found for $file: ".$res->status_line."\n"
526 | if ($config{'verbose'}>0);
527 | }
528 | return $res;
529 | }
530 |
531 | sub help {
532 | print "DVCS-Ripper: rip-git.pl. Copyright (C) Kost. Distributed under GPL.\n\n";
533 | print "Usage: $0 [options] -u [giturl] \n";
534 | print "\n";
535 | print " -c perform 'git checkout -f' on end (default)\n";
536 | print " -b Use branch (default: $config{'branch'})\n";
537 | print " -e Use redis server as server:port\n";
538 | print " -g Try to inteligently guess name of packed refs\n";
539 | print " -k Use session name for redis (default: random)\n";
540 | print " -a Use agent (default: $config{'agent'})\n";
541 | print " -n do not overwrite files\n";
542 | print " -m mkdir URL name when outputting (works good with -o)\n";
543 | print " -o specify output dir\n";
544 | print " -r specify max number of redirects (default: $config{'redirects'})\n";
545 | print " -s do not verify SSL cert\n";
546 | print " -t use parallel tasks\n";
547 | print " -p use proxy for connections\n";
548 | print " -x brute force packed refs (extremely slow!!)\n";
549 | print " -v verbose (-vv will be more verbose)\n";
550 | print " -ba set basic auth key\n";
551 | print "\n";
552 | print "Example: $0 -v -u http://www.example.com/.git/\n";
553 | print "Example: $0 # with url and options in $configfile\n";
554 | print "Example: $0 -v -u -p socks://localhost:1080 http://www.example.com/.git/\n";
555 | print "For socks like proxy, make sure you have LWP::Protocol::socks\n";
556 |
557 | exit 0;
558 | }
559 |
--------------------------------------------------------------------------------
/rip-hg.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | use strict;
4 |
5 | use IO::Socket::SSL;
6 | use LWP;
7 | use LWP::UserAgent;
8 | use HTTP::Request;
9 | use HTTP::Response;
10 | use Getopt::Long;
11 |
12 | use File::Path qw(make_path);
13 | use File::Basename;
14 |
15 |
16 | my $configfile="$ENV{HOME}/.rip-hg";
17 | my %config;
18 | $config{'hgdir'} = ".hg";
19 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
20 | $config{'verbose'}=0;
21 | $config{'checkout'}=1;
22 |
23 | $config{'respdetectmax'}=3;
24 | $config{'resp404size'}=256;
25 | $config{'resp404reqsize'}=32;
26 |
27 | sub randomstr {
28 | my($num) = @_;
29 | my @chars = ("A".."Z", "a".."z");
30 | my $string;
31 | $string .= $chars[rand @chars] for 1..$num;
32 | return $string;
33 | }
34 |
35 | if (-e $configfile) {
36 | open(CONFIG,"<$configfile") or next;
37 | while () {
38 | chomp; # no newline
39 | s/#.*//; # no comments
40 | s/^\s+//; # no leading white
41 | s/\s+$//; # no trailing white
42 | next unless length; # anything left?
43 | my ($var, $value) = split(/\s*=\s*/, $_, 2);
44 | $config{$var} = $value;
45 | }
46 | close(CONFIG);
47 | }
48 |
49 | Getopt::Long::Configure ("bundling");
50 |
51 | my $result = GetOptions (
52 | "a|agent=s" => \$config{'agent'},
53 | "b|branch=s" => \$config{'branch'},
54 | "u|url=s" => \$config{'url'},
55 | "p|proxy=s" => \$config{'proxy'},
56 | "c|checkout!" => \$config{'checkout'},
57 | "s|sslignore!" => \$config{'sslignore'},
58 | "v|verbose+" => \$config{'verbose'},
59 | "h|help" => \&help
60 | );
61 |
62 | my @knownfiles=(
63 | '00changelog.i',
64 | 'dirstate',
65 | 'requires',
66 | 'branch',
67 | 'branchheads.cache',
68 | 'last-message.txt',
69 | 'tags.cache',
70 | 'undo.branch',
71 | 'undo.desc',
72 | 'undo.dirstate',
73 | 'store/00changelog.i',
74 | 'store/00changelog.d',
75 | 'store/00manifest.i',
76 | 'store/00manifest.d',
77 | 'store/fncache',
78 | 'store/undo',
79 | '.hgignore'
80 | );
81 |
82 | my $ua = LWP::UserAgent->new;
83 |
84 | $ua->agent($config{'agent'});
85 |
86 | if ($config{'sslignore'}) {
87 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0);
88 | }
89 | if ($config{'proxy'}) {
90 | # for socks proxy make sure you have LWP::Protocol::socks
91 | $ua->proxy(['http', 'https'], $config{'proxy'});
92 | }
93 |
94 | my $ddir=$config{'hgdir'}."/";
95 |
96 | mkdir $ddir;
97 | mkdir $ddir."store";
98 | mkdir $ddir."store/data";
99 |
100 | print STDERR "[i] Downloading hg files from $config{'url'}\n" if ($config{'verbose'}>0);
101 |
102 | my @resp404;
103 | my $respdetectmax=$config{'respdetectmax'};
104 | print STDERR "[i] Auto-detecting 404 as 200 with $config{'respdetectmax'} requests\n" if ($config{'verbose'}>0);
105 | $config{'resp404correct'}=0;
106 | for (my $i=0; $i<$respdetectmax;$i++) {
107 | my $resp=getreq(randomstr($config{'resp404reqsize'}));
108 | if ($resp->is_success) {
109 | push @resp404, $resp;
110 | } else {
111 | $config{'resp404correct'}=1;
112 | last; # exit loop
113 | }
114 | }
115 |
116 | if ($config{'resp404correct'}) {
117 | print STDERR "[i] Getting correct 404 responses\n";
118 | } else {
119 | print STDERR "[i] Getting 200 as 404 responses. Adapting...\n";
120 | my $oldchopresp = substr($resp404[0]->content,0,$config{'resp404size'});
121 | foreach my $entry (@resp404) {
122 | my $chopresp=substr($entry->content,0,$config{'resp404size'});
123 | if ($oldchopresp eq $chopresp) {
124 | $oldchopresp=substr($entry->content,0,$config{'resp404size'});
125 | } else {
126 | print STDERR "[i] 404 responses are different, you will have to customize script source code\n";
127 | $config{'resp404content'}=$chopresp;
128 | last; # exit loop
129 | }
130 | }
131 | $config{'resp404content'}=$oldchopresp;
132 | }
133 |
134 | foreach my $file (@knownfiles) {
135 | getfile($file,$ddir.$file);
136 | }
137 |
138 | print STDERR "[i] Running hg status to check for missing items\n" if ($config{'verbose'}>0);
139 | my @repfiles;
140 | open(PIPE,"hg status -A |") or die "cannot find hg: $!";
141 | while () {
142 | chomp;
143 | my @getref = split (/\s+/);
144 | push @repfiles, $getref[1]; # 2nd field is filename
145 | }
146 | close(PIPE);
147 | print STDERR "[i] Got items with hg status: $#repfiles\n" if ($config{'verbose'}>0);
148 |
149 |
150 | my $numfiles=0;
151 | foreach my $file (@repfiles) {
152 | my($filename, $dirs, $suffix) = fileparse($file);
153 | my $rpath="store/data/".$file;
154 | make_path($ddir."store/data/".$dirs);
155 | my $res=getfile($rpath.".d",$ddir.$rpath.".d");
156 | my $res=getfile($rpath.".i",$ddir.$rpath.".i");
157 | if ($res->is_success) {
158 | if ($config{'checkout'}) {
159 | system("hg revert ".$file);
160 | }
161 | $numfiles++;
162 |
163 | }
164 | }
165 |
166 | my $maxfiles=$#repfiles+1;
167 | print STDERR "[i] Finished ($numfiles of $maxfiles)\n";
168 |
169 | # -- END
170 |
171 | sub getreq {
172 | my ($file) = @_;
173 | my $furl = $config{'url'}."/".$file;
174 | my $req = HTTP::Request->new(GET => $furl);
175 | # Pass request to the user agent and get a response back
176 | my $res = $ua->request($req);
177 | return $res;
178 | }
179 |
180 | sub getfile {
181 | my ($file,$outfile) = @_;
182 | my $furl = $config{'url'}."/".$file;
183 | my $req = HTTP::Request->new(GET => $furl);
184 | # Pass request to the user agent and get a response back
185 | my $res = $ua->request($req);
186 | if ($res->is_success) {
187 | if (not $config{'resp404correct'}) {
188 | print STDERR "[d] got 200 for $file, but checking content\n" if ($config{'verbose'}>1);;
189 | my $chopresp=substr($res->content,0,$config{'resp404size'});
190 | if ($chopresp eq $config{'resp404content'}) {
191 | print STDERR "[!] Not found for $file: 404 as 200\n"
192 | if ($config{'verbose'}>0);
193 | return $res;
194 | }
195 | }
196 | print STDERR "[d] found $file\n" if ($config{'verbose'}>0);;
197 | open (out,">$outfile") or die ("cannot open file $outfile: $!");
198 | print out $res->content;
199 | close (out);
200 | } else {
201 | print STDERR "[!] Not found for $file: ".$res->status_line."\n"
202 | if ($config{'verbose'}>0);
203 | }
204 | return $res;
205 | }
206 |
207 | sub help {
208 | print "DVCS-Ripper: rip-hg.pl. Copyright (C) Kost. Distributed under GPL.\n\n";
209 | print "Usage: $0 [options] -u [hgurl] \n";
210 | print "\n";
211 | print " -c perform 'hg revert' on end (default)\n";
212 | print " -b Use branch (default: $config{'branch'})\n";
213 | print " -a Use agent (default: $config{'agent'})\n";
214 | print " -s do not verify SSL cert\n";
215 | print " -p use proxy for connections\n";
216 | print " -v verbose (-vv will be more verbose)\n";
217 | print "\n";
218 | print "Example: $0 -v -u http://www.example.com/.hg/\n";
219 | print "Example: $0 # with url and options in $configfile\n";
220 | print "Example: $0 -v -u -p socks://localhost:1080 http://www.example.com/.hg/\n";
221 | print "For socks like proxy, make sure you have LWP::Protocol::socks\n";
222 |
223 | exit 0;
224 | }
225 |
226 |
--------------------------------------------------------------------------------
/rip-svn.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | use strict;
4 |
5 | use IO::Socket::SSL;
6 | use LWP;
7 | use DBI;
8 | use LWP::UserAgent;
9 | use HTTP::Request;
10 | use Getopt::Long;
11 |
12 | my $configfile="$ENV{HOME}/.rip-svn";
13 | my %config;
14 | $config{'branch'} = "trunk";
15 | $config{'scmdir'} = ".svn";
16 | $config{'agent'} = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
17 | $config{'verbose'}=0;
18 | $config{'checkout'}=1;
19 | $config{'outdir'}='./';
20 | $config{'upgrade'}=1;
21 |
22 | if (-e $configfile) {
23 | open(CONFIG,"<$configfile") or next;
24 | while () {
25 | chomp; # no newline
26 | s/#.*//; # no comments
27 | s/^\s+//; # no leading white
28 | s/\s+$//; # no trailing white
29 | next unless length; # anything left?
30 | my ($var, $value) = split(/\s*=\s*/, $_, 2);
31 | $config{$var} = $value;
32 | }
33 | close(CONFIG);
34 | }
35 |
36 | Getopt::Long::Configure ("bundling");
37 |
38 | my $result = GetOptions (
39 | "a|agent=s" => \$config{'agent'},
40 | "b|branch=s" => \$config{'branch'},
41 | "u|url=s" => \$config{'url'},
42 | "c|checkout!" => \$config{'checkout'},
43 | "s|sslignore!" => \$config{'sslignore'},
44 | "v|verbose+" => \$config{'verbose'},
45 | "h|help" => \&help
46 | );
47 |
48 | my @scmfiles=(
49 | "all-wcprops",
50 | "entries",
51 | "format",
52 | "wc.db"
53 | );
54 |
55 | if ($config{'verbose'}>3) {
56 | foreach my $key ( keys %config )
57 | {
58 | print "$key => $config{$key}\n";
59 | }
60 | }
61 |
62 | my @commits;
63 | my $ua = LWP::UserAgent->new;
64 | $ua->agent($config{'agent'});
65 |
66 | if ($config{'sslignore'}) {
67 | $ua->ssl_opts(SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE, verify_hostname => 0);
68 | }
69 |
70 | # normalize URL
71 | if ($config{'url'} =~ /\/\.svn/) {
72 | $config{'scmurl'} = $config{'url'};
73 | $config{'regurl'} = $config{'url'};
74 | $config{'regurl'} =~ s/\/\.svn//;
75 | } else {
76 | $config{'scmurl'} = $config{'url'}."/.svn";
77 | $config{'regurl'} = $config{'url'};
78 | }
79 |
80 | createsvndirs($config{'outdir'});
81 | downloadsvnfiles('',$config{'outdir'});
82 |
83 | if (-e "$config{'scmdir'}/wc.db") {
84 | print STDERR "[i] Found new SVN client storage format!\n";
85 | my $dbh = DBI->connect("dbi:SQLite:dbname=.svn/wc.db","","");
86 |
87 | my $sqlr = 'SELECT id,root,uuid FROM repository';
88 | my $sth = $dbh->prepare($sqlr) or warn "Couldn't prepare statement '$sqlr': " . $dbh->errstr;
89 | $sth->execute();
90 | while (my $record = $sth->fetchrow_hashref()) {
91 | print "REP INFO => $record->{'id'}:$record->{'root'}:$record->{'uuid'}\n";
92 | }
93 |
94 | my $sqlp = "select checksum,compression,md5_checksum from pristine";
95 | my $sthp = $dbh->prepare($sqlp) or warn "Couldn't prepare statement '$sqlp': " . $dbh->errstr;
96 | $sthp->execute();
97 | while (my $record = $sthp->fetchrow_hashref()) {
98 | print "REC INFO => $record->{'checksum'}:$record->{'compression'}:$record->{'checksum_md5'}\n" if ($config{'verbose'}>1);;
99 | if ($record->{'checksum'} =~ /\$sha1\$/) {
100 | my $nfile=substr ($record->{'checksum'},6);
101 | getobject("$config{'outdir'}/$config{'scmdir'}",$nfile);
102 | } else {
103 | warn("Unknown checksum: $record->{'checksum'}");
104 | }
105 | }
106 | $dbh->disconnect;
107 | checkout();
108 |
109 | } else {
110 | if (-e "$config{'scmdir'}/entries") {
111 | print STDERR "[i] Found old SVN client storage format!\n";
112 | svnentries('',$config{'outdir'});
113 | if ($config{'checkout'} and $config{'upgrade'}) {
114 | print STDERR "[i] Running upgrade, if you get errors, ignore if using older client\n";
115 | system("svn upgrade");
116 | }
117 | checkout();
118 | print STDERR "[i] Due to limitations, to get full tree - run this utility few times!\n";
119 | } else {
120 | print STDERR "[i] Could not identify SVN format. Are you sure it's SVN there?\n";
121 | print STDERR "[i] Anyway, take a look at ".$config{'scmurl'}."/"."entries\n";
122 | }
123 | }
124 |
125 | sub checkout {
126 | if ($config{'checkout'}) {
127 | print STDERR "[i] Trying to revert the tree, if you get error, upgrade your SVN client!\n";
128 | system("svn revert -R .");
129 | }
130 | }
131 |
132 | sub createsvndirs {
133 | my ($dir) = @_;
134 | mkdir $dir."/.svn";
135 | mkdir $dir."/.svn/text-base";
136 | mkdir $dir."/.svn/pristine";
137 | mkdir $dir."/.svn/tmp";
138 | }
139 |
140 | sub downloadsvnfiles {
141 | my ($url,$dir) = @_;
142 | foreach my $file (@scmfiles) {
143 | my $furl = "$url/$config{'scmdir'}/$file";
144 | getfile($furl,"$dir/$config{'scmdir'}/$file");
145 | }
146 | }
147 |
148 | sub svnentries {
149 | my ($url, $dir) = @_;
150 |
151 | createsvndirs("$dir");
152 | my $svnentries = "$dir/$config{'scmdir'}/entries";
153 | # getfile("/$svnentries","$dir/$svnentries");
154 | # my $file="$dir/$svnentries";
155 |
156 | downloadsvnfiles($url,$dir);
157 |
158 | open(SVN,"<$svnentries") or warn ("cannot open entries file '$svnentries': $!\n");
159 | my $prevline;
160 | while () {
161 | chomp;
162 | if ($_ eq "dir") {
163 | if (not $prevline eq '') {
164 | my $newdir=$prevline;
165 | if (not -e $newdir) {
166 | mkdir $newdir;
167 | svnentries("$url/$newdir","$dir/$newdir");
168 | }
169 | }
170 | }
171 |
172 | if ($_ eq "file") {
173 | my $newfile=$prevline;
174 | getfile("$url/.svn/text-base/$newfile.svn-base","$dir/.svn/text-base/$newfile.svn-base");
175 | }
176 | $prevline=$_;
177 | }
178 | close(SVN);
179 | }
180 |
181 |
182 | sub getobject {
183 | my ($gd,$ref) = @_;
184 | my $rdir = substr ($ref,0,2); # first two chars of sha1 is dirname
185 | my $rfile = $ref.".svn-base"; # whole sha1 is filename
186 | mkdir $gd."/pristine/$rdir";
187 | getfile($config{'scmdir'}."/pristine/$rdir/$rfile",$gd."/pristine/$rdir/$rfile");
188 | }
189 |
190 | sub getfile {
191 | my ($file,$outfile) = @_;
192 | my $furl = $config{'regurl'}."/".$file;
193 | my $req = HTTP::Request->new(GET => $furl);
194 | # Pass request to the user agent and get a response back
195 | my $res = $ua->request($req);
196 | if ($res->is_success) {
197 | print STDERR "[d] found $file\n" if ($config{'verbose'}>1);;
198 | open (out,">$outfile") or die ("cannot open file '$outfile': $!");
199 | print out $res->content;
200 | close (out);
201 | } else {
202 | print STDERR "[!] Not found for $furl => $file: ".$res->status_line."\n"
203 | if ($config{'verbose'}>1);
204 | }
205 | return $res;
206 | }
207 |
208 | sub help {
209 | print "DVCS-Ripper: rip-svn.pl. Copyright (C) Kost. Distributed under GPL.\n\n";
210 | print "Usage: $0 [options] -u [svnurl] \n";
211 | print "\n";
212 | print " -c perform 'checkout' on end (default)\n";
213 | print " -b Use branch (default: $config{'branch'})\n";
214 | print " -a Use agent (default: $config{'agent'})\n";
215 | print " -s ignore SSL certification verification\n";
216 | print " -v verbose (-vv will be more verbose)\n";
217 | print "\n";
218 |
219 | print "Example: $0 -v -u http://www.example.com/.svn/\n";
220 | print "Example: $0 # with url and options in $configfile\n";
221 |
222 | exit 0;
223 | }
224 |
225 |
--------------------------------------------------------------------------------