├── .gitconfig
├── .gitignore
├── LICENSE
├── README.md
├── V1_Doc.md
├── docs
├── Makefile
├── conf.py
├── index.rst
├── make.bat
└── pages
│ ├── examples.rst
│ ├── installation.rst
│ └── requirements.rst
├── driver.py
├── fabfile.py
├── parallel_sync
├── __init__.py
├── compression.py
├── downloader.py
├── executor.py
├── rsync.py
└── wget.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── smoke_tests
└── rsync.py
└── tests
└── test_rsync.py
/.gitconfig:
--------------------------------------------------------------------------------
1 | [user]
2 | email = kouroshtheking@gmail.com
3 | name = kouroshparsa
4 |
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 |
46 | # Translations
47 | *.mo
48 | *.pot
49 |
50 | # Django stuff:
51 | *.log
52 |
53 | # Sphinx documentation
54 | docs/_build/
55 |
56 | # PyBuilder
57 | target/
58 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # V2.x parallel_sync Documentation
2 |
3 | Documentation for the older versions of the package are at: V1_Doc
4 |
5 | **Introduction**
6 |
7 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks. It can do operations such as rsync, scp, wget.
8 | It can use used on both Windows and Linux and Mac OS. Note that on Windows, you need to have OpenSsh enabled and the package will automaticalled use scp instead of rsync.
9 |
10 | **How to install:**
11 |
12 | `pip install parallel_sync`
13 |
14 | **Requirement:**
15 | - Python >= 3
16 | - ssh service must be installed and running.
17 | - if rsync is installed on the local machine, it will be used, otherwise it will fall back to using scp.
18 | - To use the wget method, you need to install wget on the target machine
19 | - To untar/unzip files you need tar/zip packages installed on the target machine
20 |
21 | **Benefits:**
22 | - Very fast file transfer (parallelized)
23 | - If the file exists and is not changed, it will not waste time copying it
24 | - You can specify retries in case you have a bad connection
25 | - It can handle large files
26 |
27 | In most of the examples below, you can specify `parallelism` and `tries` which allow you to parallelize tasks and retry upon failure.
28 | By default `parallelism` is set to 10 workers and tries is 1.
29 |
30 | ## Upstream Example:
31 | ```python
32 | from parallel_sync import rsync, Credential
33 | creds = Credential(username='user',
34 | hostname='192.168.168.9',
35 | port=3022,
36 | key_filename='~/.ssh/id_rsa')
37 | rsync.upload('/tmp/x', '/tmp/y', creds=creds, exclude=['*.pyc', '*.sh'])
38 | ```
39 |
40 | ## Downstream Example:
41 |
42 | ```python
43 | from parallel_sync import rsync
44 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
45 | rsync.download('/tmp/y', '/tmp/z', creds=creds)
46 | ```
47 |
48 | ## Using non-default Ports
49 | ```python
50 | from parallel_sync import rsync, Credential
51 | creds = Credential(username='user',
52 | hostname='192.168.168.9',
53 | port=3022,
54 | key_filename='~/.ssh/id_rsa')
55 | rsync.download('/tmp/y', '/tmp/z', creds=creds)
56 | ```
57 |
58 |
59 | ## Downloading files on a remote machine:
60 |
61 | For this, you need to have wget installed on the remote machine.
62 | ```python
63 | from parallel_sync import wget, Credential
64 | creds = Credential(username='user',
65 | hostname='192.168.168.9',
66 | port=3022,
67 | key_filename='~/.ssh/id_rsa')
68 | urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip']
69 | wget.download('/tmp', urls=urls, creds=creds)
70 | ```
71 |
72 | ## Downloading files on the local machine
73 | Downloading files using requests package locally is simple but what if you want to parallelize it?
74 | Here is the solution for that:
75 | ```python
76 | from parallel_sync import downloader
77 | urls = ['http://something1', 'http://somthing2', 'http://somthing3']
78 | download('c:/temp/x',
79 | extension='.png', parallelism=10)
80 | ```
81 |
82 | ## Integration with Fabric:
83 | ```
84 | from fabric import task
85 | from parallel_sync import rsync, wget, get_fabric_credentials
86 |
87 | @task
88 | def deploy(conn):
89 | creds = get_fabric_credentials(conn)
90 | urls = ['http://something1', 'http://somthing2', 'http://somthing3']
91 | wget.download(creds, '/tmp/images', urls)
92 | rsync.upload('/src', '/dst', creds, tries=3)
93 | ```
94 |
95 | Here you have a task called deploy. You can run it using the following command:
96 | ```
97 | fab [user]@[hostname]:[port] -i [path to you key file] deploy
98 | ```
99 |
100 | If you come across any bugs, please report it on github.
101 |
--------------------------------------------------------------------------------
/V1_Doc.md:
--------------------------------------------------------------------------------
1 |
2 | # V1.x parallel_sync Documentation
3 |
4 | **Introduction**
5 |
6 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks on Linux.
7 | The files can be transferred from a remote linux host or a url.
8 |
9 | **How to install:**
10 |
11 | `pip install parallel_sync`
12 |
13 | **Requirement:**
14 | - Python >= 2.6 Linux Only!
15 | - ssh service must be installed and running.
16 | - To use the rsync features, you need to have rsync installed.
17 | - to use the url module, you need to install wget on the target machine
18 | - To untar/unzip files you need tar/zip packages installed
19 |
20 | **Benefits:**
21 | - Very fast file transfer (parallelized)
22 | - If the file exists and is not changed, it will not waste time copying it
23 | - You can specify retries in case you have a bad connection
24 | - It can handle large files
25 |
26 | In most of the examples below, you can specify `parallelism` and `tries` which allow you to parallelize tasks and retry upon failure.
27 | By default `parallelism` is set to 10 workers.
28 |
29 | ## Upstream Example:
30 | ```python
31 | from parallel_sync import rsync
32 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
33 | rsync.upload('/tmp/x', '/tmp/y', creds=creds, exclude=['*.pyc', '*.sh'])
34 | ```
35 |
36 | ## Downstream Example:
37 |
38 | ```python
39 | from parallel_sync import rsync
40 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
41 | rsync.download('/tmp/y', '/tmp/z', creds=creds)
42 | ```
43 |
44 | ## Using non-default Ports
45 | ```python
46 | from parallel_sync import rsync
47 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31', 'port': 1234}
48 | rsync.download('/tmp/y', '/tmp/z', creds=creds)
49 | ```
50 |
51 |
52 | ## File Download Example:
53 |
54 | ```python
55 | from parallel_sync import wget
56 | urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip']
57 | wget.download('/tmp', urls=urls, extract=True)
58 |
59 | # download locally with a specified filename:
60 |
61 | wget.download(LOCAL_TARGET, 'http://something/else/file.zip',\
62 | filenames='x.zip', extract=True)
63 |
64 | # download on a remote machine:
65 |
66 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
67 | wget.download('/tmp', urls=urls, creds=creds)
68 |
69 | # To untar or unzip compressed files after download:
70 | wget.download('/tmp', urls=urls, creds=creds, extract=True)
71 | ```
72 |
73 | Example extracting a file on a remote host:
74 |
75 | ```python
76 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
77 | from parallel_sync import compression
78 | compression.extract('/tmp/x.tar.gz', creds=creds)
79 | ```
80 |
81 | Example checking that a files exists on the remote server:
82 |
83 | ```python
84 | from parallel_sync import executor
85 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
86 | path = '/tmp/myfile'
87 | if executor.path_exists(path, creds):
88 | print("yes")
89 | ```
90 |
91 | Example finding files or directories on a remote server:
92 |
93 | ```python
94 | from parallel_sync import executor
95 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
96 | dir_path = '/tmp/mydir'
97 | files = executor.find_files(dir_path, creds, include=['*.png', '*.jpg'])
98 |
99 | dirs = executor.find_dirs(dir_path, creds, include=['test'])
100 |
101 | # Note that if creds is None, then it will search on localhost
102 | ```
103 |
104 | Example Running commands:
105 |
106 | ```python
107 | from parallel_sync import executor
108 |
109 | cmds = ['mv /tmp/x /tmp/y', 'touch /tmp/z']
110 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
111 | executor.run(cmds, creds=creds, parallelism=len(cmds))
112 |
113 | print(executor.run('pwd', creds=creds, curr_dir='/tmp'))
114 | ```
115 |
116 | Example using parallel_sync within fabric:
117 |
118 | ```python
119 | from fabric.api import env
120 | from parallel_sync import rsync
121 |
122 | rsync.upload('/tmp/x', '/tmp/y', creds=env)
123 | rsync.download('/tmp/y', '/tmp/z', creds=env)
124 | ```
125 |
126 | To transfer files locally:
127 |
128 | ```python
129 | from parallel_sync import rsync
130 | rsync.copy('/tmp/x', '/tmp/y', exclude=['*.pyc'], parallelism=10, extract=False, validate=False)
131 | ```
132 |
133 | where /tmp/x is a directory.
134 |
135 |
136 | If you come across any bugs, please report it on github.
137 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " applehelp to make an Apple Help Book"
34 | @echo " devhelp to make HTML files and a Devhelp project"
35 | @echo " epub to make an epub"
36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
37 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
39 | @echo " text to make text files"
40 | @echo " man to make manual pages"
41 | @echo " texinfo to make Texinfo files"
42 | @echo " info to make Texinfo files and run them through makeinfo"
43 | @echo " gettext to make PO message catalogs"
44 | @echo " changes to make an overview of all changed/added/deprecated items"
45 | @echo " xml to make Docutils-native XML files"
46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
47 | @echo " linkcheck to check all external links for integrity"
48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
49 | @echo " coverage to run coverage check of the documentation (if enabled)"
50 |
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | html:
55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
56 | @echo
57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
58 |
59 | dirhtml:
60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
61 | @echo
62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
63 |
64 | singlehtml:
65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
66 | @echo
67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
68 |
69 | pickle:
70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
71 | @echo
72 | @echo "Build finished; now you can process the pickle files."
73 |
74 | json:
75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
76 | @echo
77 | @echo "Build finished; now you can process the JSON files."
78 |
79 | htmlhelp:
80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
81 | @echo
82 | @echo "Build finished; now you can run HTML Help Workshop with the" \
83 | ".hhp project file in $(BUILDDIR)/htmlhelp."
84 |
85 | qthelp:
86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
87 | @echo
88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/parallel_sync.qhcp"
91 | @echo "To view the help file:"
92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/parallel_sync.qhc"
93 |
94 | applehelp:
95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
96 | @echo
97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
98 | @echo "N.B. You won't be able to view it unless you put it in" \
99 | "~/Library/Documentation/Help or install it in your application" \
100 | "bundle."
101 |
102 | devhelp:
103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | @echo
105 | @echo "Build finished."
106 | @echo "To view the help file:"
107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/parallel_sync"
108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/parallel_sync"
109 | @echo "# devhelp"
110 |
111 | epub:
112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | @echo
114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 |
116 | latex:
117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | @echo
119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | "(use \`make latexpdf' here to do that automatically)."
122 |
123 | latexpdf:
124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | @echo "Running LaTeX files through pdflatex..."
126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 |
129 | latexpdfja:
130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | @echo "Running LaTeX files through platex and dvipdfmx..."
132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 |
135 | text:
136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | @echo
138 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
139 |
140 | man:
141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | @echo
143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 |
145 | texinfo:
146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | @echo
148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | @echo "Run \`make' in that directory to run these through makeinfo" \
150 | "(use \`make info' here to do that automatically)."
151 |
152 | info:
153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | @echo "Running Texinfo files through makeinfo..."
155 | make -C $(BUILDDIR)/texinfo info
156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 |
158 | gettext:
159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | @echo
161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 |
163 | changes:
164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | @echo
166 | @echo "The overview file is in $(BUILDDIR)/changes."
167 |
168 | linkcheck:
169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | @echo
171 | @echo "Link check complete; look for any errors in the above output " \
172 | "or in $(BUILDDIR)/linkcheck/output.txt."
173 |
174 | doctest:
175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | @echo "Testing of doctests in the sources finished, look at the " \
177 | "results in $(BUILDDIR)/doctest/output.txt."
178 |
179 | coverage:
180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | @echo "Testing of coverage in the sources finished, look at the " \
182 | "results in $(BUILDDIR)/coverage/python.txt."
183 |
184 | xml:
185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | @echo
187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 |
189 | pseudoxml:
190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | @echo
192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # parallel_sync documentation build configuration file, created by
4 | # sphinx-quickstart on Mon Oct 26 12:02:00 2015.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | import sys
16 | import os
17 | import shlex
18 |
19 | # If extensions (or modules to document with autodoc) are in another directory,
20 | # add these directories to sys.path here. If the directory is relative to the
21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
22 | #sys.path.insert(0, os.path.abspath('.'))
23 |
24 | # -- General configuration ------------------------------------------------
25 |
26 | # If your documentation needs a minimal Sphinx version, state it here.
27 | #needs_sphinx = '1.0'
28 |
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = [
33 | 'sphinx.ext.autodoc',
34 | ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # The suffix(es) of source filenames.
40 | # You can specify multiple suffix as a list of string:
41 | # source_suffix = ['.rst', '.md']
42 | source_suffix = '.rst'
43 |
44 | # The encoding of source files.
45 | #source_encoding = 'utf-8-sig'
46 |
47 | # The master toctree document.
48 | master_doc = 'index'
49 |
50 | # General information about the project.
51 | project = u'parallel_sync'
52 | copyright = u'2015, Kourosh Parsa'
53 | author = u'Kourosh Parsa'
54 |
55 | # The version info for the project you're documenting, acts as replacement for
56 | # |version| and |release|, also used in various other places throughout the
57 | # built documents.
58 | #
59 | # The short X.Y version.
60 | version = '1.5'
61 | # The full version, including alpha/beta/rc tags.
62 | release = '1.5'
63 |
64 | # The language for content autogenerated by Sphinx. Refer to documentation
65 | # for a list of supported languages.
66 | #
67 | # This is also used if you do content translation via gettext catalogs.
68 | # Usually you set "language" from the command line for these cases.
69 | language = None
70 |
71 | # There are two options for replacing |today|: either, you set today to some
72 | # non-false value, then it is used:
73 | #today = ''
74 | # Else, today_fmt is used as the format for a strftime call.
75 | #today_fmt = '%B %d, %Y'
76 |
77 | # List of patterns, relative to source directory, that match files and
78 | # directories to ignore when looking for source files.
79 | exclude_patterns = ['_build']
80 |
81 | # The reST default role (used for this markup: `text`) to use for all
82 | # documents.
83 | #default_role = None
84 |
85 | # If true, '()' will be appended to :func: etc. cross-reference text.
86 | #add_function_parentheses = True
87 |
88 | # If true, the current module name will be prepended to all description
89 | # unit titles (such as .. function::).
90 | #add_module_names = True
91 |
92 | # If true, sectionauthor and moduleauthor directives will be shown in the
93 | # output. They are ignored by default.
94 | #show_authors = False
95 |
96 | # The name of the Pygments (syntax highlighting) style to use.
97 | pygments_style = 'sphinx'
98 |
99 | # A list of ignored prefixes for module index sorting.
100 | #modindex_common_prefix = []
101 |
102 | # If true, keep warnings as "system message" paragraphs in the built documents.
103 | #keep_warnings = False
104 |
105 | # If true, `todo` and `todoList` produce output, else they produce nothing.
106 | todo_include_todos = False
107 |
108 |
109 | # -- Options for HTML output ----------------------------------------------
110 |
111 | # The theme to use for HTML and HTML Help pages. See the documentation for
112 | # a list of builtin themes.
113 | html_theme = 'alabaster'
114 |
115 | # Theme options are theme-specific and customize the look and feel of a theme
116 | # further. For a list of options available for each theme, see the
117 | # documentation.
118 | #html_theme_options = {}
119 |
120 | # Add any paths that contain custom themes here, relative to this directory.
121 | #html_theme_path = []
122 |
123 | # The name for this set of Sphinx documents. If None, it defaults to
124 | # " v documentation".
125 | #html_title = None
126 |
127 | # A shorter title for the navigation bar. Default is the same as html_title.
128 | #html_short_title = None
129 |
130 | # The name of an image file (relative to this directory) to place at the top
131 | # of the sidebar.
132 | #html_logo = None
133 |
134 | # The name of an image file (within the static path) to use as favicon of the
135 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
136 | # pixels large.
137 | #html_favicon = None
138 |
139 | # Add any paths that contain custom static files (such as style sheets) here,
140 | # relative to this directory. They are copied after the builtin static files,
141 | # so a file named "default.css" will overwrite the builtin "default.css".
142 | html_static_path = ['_static']
143 |
144 | # Add any extra paths that contain custom files (such as robots.txt or
145 | # .htaccess) here, relative to this directory. These files are copied
146 | # directly to the root of the documentation.
147 | #html_extra_path = []
148 |
149 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
150 | # using the given strftime format.
151 | #html_last_updated_fmt = '%b %d, %Y'
152 |
153 | # If true, SmartyPants will be used to convert quotes and dashes to
154 | # typographically correct entities.
155 | #html_use_smartypants = True
156 |
157 | # Custom sidebar templates, maps document names to template names.
158 | #html_sidebars = {}
159 |
160 | # Additional templates that should be rendered to pages, maps page names to
161 | # template names.
162 | #html_additional_pages = {}
163 |
164 | # If false, no module index is generated.
165 | #html_domain_indices = True
166 |
167 | # If false, no index is generated.
168 | #html_use_index = True
169 |
170 | # If true, the index is split into individual pages for each letter.
171 | #html_split_index = False
172 |
173 | # If true, links to the reST sources are added to the pages.
174 | #html_show_sourcelink = True
175 |
176 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
177 | #html_show_sphinx = True
178 |
179 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
180 | #html_show_copyright = True
181 |
182 | # If true, an OpenSearch description file will be output, and all pages will
183 | # contain a tag referring to it. The value of this option must be the
184 | # base URL from which the finished HTML is served.
185 | #html_use_opensearch = ''
186 |
187 | # This is the file name suffix for HTML files (e.g. ".xhtml").
188 | #html_file_suffix = None
189 |
190 | # Language to be used for generating the HTML full-text search index.
191 | # Sphinx supports the following languages:
192 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
193 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
194 | #html_search_language = 'en'
195 |
196 | # A dictionary with options for the search language support, empty by default.
197 | # Now only 'ja' uses this config value
198 | #html_search_options = {'type': 'default'}
199 |
200 | # The name of a javascript file (relative to the configuration directory) that
201 | # implements a search results scorer. If empty, the default will be used.
202 | #html_search_scorer = 'scorer.js'
203 |
204 | # Output file base name for HTML help builder.
205 | htmlhelp_basename = 'parallel_syncdoc'
206 |
207 | # -- Options for LaTeX output ---------------------------------------------
208 |
209 | latex_elements = {
210 | # The paper size ('letterpaper' or 'a4paper').
211 | #'papersize': 'letterpaper',
212 |
213 | # The font size ('10pt', '11pt' or '12pt').
214 | #'pointsize': '10pt',
215 |
216 | # Additional stuff for the LaTeX preamble.
217 | #'preamble': '',
218 |
219 | # Latex figure (float) alignment
220 | #'figure_align': 'htbp',
221 | }
222 |
223 | # Grouping the document tree into LaTeX files. List of tuples
224 | # (source start file, target name, title,
225 | # author, documentclass [howto, manual, or own class]).
226 | latex_documents = [
227 | (master_doc, 'parallel_sync.tex', u'parallel\\_sync Documentation',
228 | u'Kourosh Parsa', 'manual'),
229 | ]
230 |
231 | # The name of an image file (relative to this directory) to place at the top of
232 | # the title page.
233 | #latex_logo = None
234 |
235 | # For "manual" documents, if this is true, then toplevel headings are parts,
236 | # not chapters.
237 | #latex_use_parts = False
238 |
239 | # If true, show page references after internal links.
240 | #latex_show_pagerefs = False
241 |
242 | # If true, show URL addresses after external links.
243 | #latex_show_urls = False
244 |
245 | # Documents to append as an appendix to all manuals.
246 | #latex_appendices = []
247 |
248 | # If false, no module index is generated.
249 | #latex_domain_indices = True
250 |
251 |
252 | # -- Options for manual page output ---------------------------------------
253 |
254 | # One entry per manual page. List of tuples
255 | # (source start file, name, description, authors, manual section).
256 | man_pages = [
257 | (master_doc, 'parallel_sync', u'parallel_sync Documentation',
258 | [author], 1)
259 | ]
260 |
261 | # If true, show URL addresses after external links.
262 | #man_show_urls = False
263 |
264 |
265 | # -- Options for Texinfo output -------------------------------------------
266 |
267 | # Grouping the document tree into Texinfo files. List of tuples
268 | # (source start file, target name, title, author,
269 | # dir menu entry, description, category)
270 | texinfo_documents = [
271 | (master_doc, 'parallel_sync', u'parallel_sync Documentation',
272 | author, 'parallel_sync', 'One line description of project.',
273 | 'Miscellaneous'),
274 | ]
275 |
276 | # Documents to append as an appendix to all manuals.
277 | #texinfo_appendices = []
278 |
279 | # If false, no module index is generated.
280 | #texinfo_domain_indices = True
281 |
282 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
283 | #texinfo_show_urls = 'footnote'
284 |
285 | # If true, do not generate a @detailmenu in the "Top" node's menu.
286 | #texinfo_no_detailmenu = False
287 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. parallel_sync documentation master file, created by
2 | sphinx-quickstart on Mon Oct 26 12:02:00 2015.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to parallel_sync's documentation!
7 | =========================================
8 |
9 | Contents:
10 |
11 | .. toctree::
12 | :maxdepth: 2
13 |
14 | pages/requirements
15 | pages/installation
16 | pages/examples
17 |
18 | Indices and tables
19 | ==================
20 |
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 |
25 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=_build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | echo. coverage to run coverage check of the documentation if enabled
41 | goto end
42 | )
43 |
44 | if "%1" == "clean" (
45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
46 | del /q /s %BUILDDIR%\*
47 | goto end
48 | )
49 |
50 |
51 | REM Check if sphinx-build is available and fallback to Python version if any
52 | %SPHINXBUILD% 2> nul
53 | if errorlevel 9009 goto sphinx_python
54 | goto sphinx_ok
55 |
56 | :sphinx_python
57 |
58 | set SPHINXBUILD=python -m sphinx.__init__
59 | %SPHINXBUILD% 2> nul
60 | if errorlevel 9009 (
61 | echo.
62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
63 | echo.installed, then set the SPHINXBUILD environment variable to point
64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
65 | echo.may add the Sphinx directory to PATH.
66 | echo.
67 | echo.If you don't have Sphinx installed, grab it from
68 | echo.http://sphinx-doc.org/
69 | exit /b 1
70 | )
71 |
72 | :sphinx_ok
73 |
74 |
75 | if "%1" == "html" (
76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
77 | if errorlevel 1 exit /b 1
78 | echo.
79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
80 | goto end
81 | )
82 |
83 | if "%1" == "dirhtml" (
84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
85 | if errorlevel 1 exit /b 1
86 | echo.
87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
88 | goto end
89 | )
90 |
91 | if "%1" == "singlehtml" (
92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
93 | if errorlevel 1 exit /b 1
94 | echo.
95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
96 | goto end
97 | )
98 |
99 | if "%1" == "pickle" (
100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | if errorlevel 1 exit /b 1
102 | echo.
103 | echo.Build finished; now you can process the pickle files.
104 | goto end
105 | )
106 |
107 | if "%1" == "json" (
108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | if errorlevel 1 exit /b 1
110 | echo.
111 | echo.Build finished; now you can process the JSON files.
112 | goto end
113 | )
114 |
115 | if "%1" == "htmlhelp" (
116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | if errorlevel 1 exit /b 1
118 | echo.
119 | echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | goto end
122 | )
123 |
124 | if "%1" == "qthelp" (
125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\parallel_sync.qhcp
131 | echo.To view the help file:
132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\parallel_sync.ghc
133 | goto end
134 | )
135 |
136 | if "%1" == "devhelp" (
137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | if errorlevel 1 exit /b 1
139 | echo.
140 | echo.Build finished.
141 | goto end
142 | )
143 |
144 | if "%1" == "epub" (
145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | if errorlevel 1 exit /b 1
147 | echo.
148 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | goto end
150 | )
151 |
152 | if "%1" == "latex" (
153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | if errorlevel 1 exit /b 1
155 | echo.
156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | goto end
158 | )
159 |
160 | if "%1" == "latexpdf" (
161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | cd %BUILDDIR%/latex
163 | make all-pdf
164 | cd %~dp0
165 | echo.
166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | goto end
168 | )
169 |
170 | if "%1" == "latexpdfja" (
171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | cd %BUILDDIR%/latex
173 | make all-pdf-ja
174 | cd %~dp0
175 | echo.
176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | goto end
178 | )
179 |
180 | if "%1" == "text" (
181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | if errorlevel 1 exit /b 1
183 | echo.
184 | echo.Build finished. The text files are in %BUILDDIR%/text.
185 | goto end
186 | )
187 |
188 | if "%1" == "man" (
189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | if errorlevel 1 exit /b 1
191 | echo.
192 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | goto end
194 | )
195 |
196 | if "%1" == "texinfo" (
197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | if errorlevel 1 exit /b 1
199 | echo.
200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | goto end
202 | )
203 |
204 | if "%1" == "gettext" (
205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | if errorlevel 1 exit /b 1
207 | echo.
208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | goto end
210 | )
211 |
212 | if "%1" == "changes" (
213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | if errorlevel 1 exit /b 1
215 | echo.
216 | echo.The overview file is in %BUILDDIR%/changes.
217 | goto end
218 | )
219 |
220 | if "%1" == "linkcheck" (
221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | if errorlevel 1 exit /b 1
223 | echo.
224 | echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | goto end
227 | )
228 |
229 | if "%1" == "doctest" (
230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | if errorlevel 1 exit /b 1
232 | echo.
233 | echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | goto end
236 | )
237 |
238 | if "%1" == "coverage" (
239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | if errorlevel 1 exit /b 1
241 | echo.
242 | echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | goto end
245 | )
246 |
247 | if "%1" == "xml" (
248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | if errorlevel 1 exit /b 1
250 | echo.
251 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | goto end
253 | )
254 |
255 | if "%1" == "pseudoxml" (
256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | if errorlevel 1 exit /b 1
258 | echo.
259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | goto end
261 | )
262 |
263 | :end
264 |
--------------------------------------------------------------------------------
/docs/pages/examples.rst:
--------------------------------------------------------------------------------
1 | parallel_sync Usage
2 | ===================
3 |
4 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks on Linux.
5 | The files can be transferred from a remote linux host or a url.
6 |
7 | Benefits:
8 | * Very fast file transfer (parallelized)
9 | * If the file exists and is not changed, it will not waste time copying it
10 | * You can specify retries in case you have a bad connection
11 | * It can handle large files
12 |
13 | In most of the examples below, you can specify **parallelism** and **tries** which allow you to parallelize tasks and retry upon failure.
14 | By default **parallelism** is set to 10 workers.
15 |
16 |
17 | Upstream Example:
18 | ::
19 | from parallel_sync import rsync
20 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
21 | rsync.upload('/tmp/local_dir', '/tmp/remote_dir', creds=creds)
22 |
23 | Downstream Example:
24 | ::
25 | from parallel_sync import rsync
26 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
27 | rsync.download('/tmp/remote_dir', '/tmp/local_dir', creds=creds)
28 |
29 |
30 | File Download Example:
31 | ::
32 | from parallel_sync import wget
33 | urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip']
34 | wget.download('/tmp', urls, extract=True)
35 |
36 | # download on a remote machine:
37 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
38 | wget.download('/tmp', urls, creds=creds)
39 |
40 | # To untar or unzip compressed files after download:
41 | wget.download('/tmp', urls, creds=creds, extract=True)
42 |
43 |
44 | Example extracting a file on a remote host:
45 | ::
46 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'}
47 | rom parallel_sync import compression
48 | compression.extract('/tmp/x.tar.gz', creds=creds)
49 |
50 |
51 | Example checking that a files exists:
52 | ::
53 | from parallel_sync import executor
54 | if executor.path_exists(path, creds):
55 | print "yes"
56 |
57 |
58 | Example finding files or directories:
59 | ::
60 | from parallel_sync import executor
61 | files = executor.find_files(dir_path, creds, include=['*.png', '*.jpg'])
62 | dirs = executor.find_dirs(dir_path, creds, include=['test'])
63 |
64 | Note that if creds is None, then it will search on localhost
65 |
66 |
67 | Example Running commands:
68 | ::
69 | from parallel_sync import executor
70 | cmds = ['mv /tmp/x /tmp/y', 'touch /tmp/z']
71 | executor.run(cmds, creds=creds, parallelism=len(cmds)):
72 | print executor.run('pwd', creds=creds, curr_dir='/tmp')
73 |
74 |
75 | Example using parallel_sync within fabric:
76 | ::
77 | from fabric.api import env
78 | from parallel_sync import rsync
79 | rsync.upload('/tmp/x', '/tmp/y', creds=env)
80 | rsync.download('/tmp/y', '/tmp/z', creds=env)
81 |
82 |
83 | .. _github-link: https://github.com/kouroshparsa/parallel_sync
84 | If you come across any bugs, please report it on
85 | https://github.com/kouroshparsa/parallel_sync
86 |
87 |
--------------------------------------------------------------------------------
/docs/pages/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ========
3 |
4 | It's as easy as:
5 | ``pip install parallel_rsync``
6 |
--------------------------------------------------------------------------------
/docs/pages/requirements.rst:
--------------------------------------------------------------------------------
1 | Requirements
2 | ========
3 |
4 | 1. Python 2.7 (version 3 is not supported yet)
5 | 2. Linux only
6 | 3. To download urls, you need wget installed on the target machine
7 | 4. To send files up or down stream to another host, you need rsync installed on both machines
8 |
9 |
--------------------------------------------------------------------------------
/driver.py:
--------------------------------------------------------------------------------
1 | from parallel_sync import rsync, Credential
2 | creds = Credential(username='user',
3 | hostname='192.168.168.9',
4 | port=3022,
5 | key_filename='~/.ssh/id_rsa')
6 | rsync.upload('/tmp/x', '/tmp/y', creds=creds, exclude=['*.pyc', '*.sh'])
--------------------------------------------------------------------------------
/fabfile.py:
--------------------------------------------------------------------------------
1 | from fabric import task
2 | from parallel_sync import rsync, wget, get_fabric_credentials
3 |
4 | @task
5 | def deploy(conn):
6 | creds = get_fabric_credentials(conn)
7 | urls = ['https://images.unsplash.com/photo-1682695798256-28a674122872?q=80&w=3870&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDF8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D',
8 | 'https://images.unsplash.com/photo-1682687982360-3fbab65f9d50?q=80&w=3870&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDF8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D']
9 | wget.download(creds, '/tmp/images', urls)
10 |
--------------------------------------------------------------------------------
/parallel_sync/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is the core package module
3 | """
4 | import logging
5 | from dataclasses import dataclass
6 | logging.basicConfig(level='INFO')
7 | logging.getLogger("paramiko").setLevel(logging.WARNING)
8 |
9 | class IllegalArgumentError(ValueError):
10 | pass
11 | @dataclass
12 | class Credential:
13 | key_filename: str
14 | username: str
15 | hostname: str
16 | port: int = 22
17 | timeout: int = 10 # seconds
18 |
19 |
20 | def get_fabric_credentials(conn) -> Credential:
21 | """
22 | @conn: fabric connection object of type fabric.connection.Connection
23 | Returns a Credential object
24 | """
25 | import fabric
26 | if not isinstance(conn, fabric.connection.Connection):
27 | raise IllegalArgumentError('Invalid parameter. You are supposed to pass '
28 | 'an object of type fabric.connection.Connection')
29 | user = conn.user
30 | host = conn.host
31 | port = conn.port
32 | key = conn.connect_kwargs['key_filename'][0]
33 | return Credential(key_filename=key, username=user, hostname=host, port=port)
34 |
--------------------------------------------------------------------------------
/parallel_sync/compression.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is for handling unziping of archived files
3 | """
4 |
5 | def get_unzip_cmd(path: str):
6 | """
7 | @path: str
8 | returns the command to unzip that specified file
9 | """
10 | if path.endswith('.tar.gz'):
11 | return 'tar -zxf'
12 | elif path.endswith('.gz'):
13 | return 'gunzip'
14 | elif path.endswith('.zip'):
15 | return 'unzip'
16 |
17 | return None
18 |
--------------------------------------------------------------------------------
/parallel_sync/downloader.py:
--------------------------------------------------------------------------------
1 | import os
2 | from multiprocessing.pool import ThreadPool
3 | from functools import partial
4 | from urllib import parse, request
5 |
6 | def __download(folder: str, url: str, extension: str=None):
7 | """
8 | @folder: where to download to
9 | @url: url to download from
10 | @extension: if specified, then you'd add this extension to the filename
11 | """
12 | scheme, netloc, path, query, fragment = parse.urlsplit(url)
13 | filename = os.path.basename(path)
14 | if extension is not None:
15 | if not extension.startswith('.'):
16 | extension = f'.{extension}'
17 | filename = f'{filename}{extension}'
18 |
19 | with request.urlopen(url) as f:
20 | with open(os.path.join(folder, filename), 'wb') as output:
21 | output.write(f.read())
22 |
23 | def download(folder: str, urls: list, extension=None, parallelism: int=10):
24 | pool = ThreadPool(processes=parallelism)
25 | async_results = []
26 | for url in urls:
27 | async_results.append(pool.apply_async(__download, (folder, url, extension)))
28 |
29 | for res in async_results:
30 | res.get()
31 |
--------------------------------------------------------------------------------
/parallel_sync/executor.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the central module that does common operations
3 | either locally or remotely.
4 | It can do operations in parallel batches as well
5 | """
6 | import signal
7 | import re
8 | import pathlib
9 | import logging
10 | import subprocess
11 | from six import string_types
12 | import paramiko
13 | from . import Credential
14 | logging.basicConfig(level='INFO')
15 |
16 | from queue import Queue
17 |
18 |
19 | def init_worker():
20 | """ use this Pool initializer to allow keyboard interruption """
21 | signal.signal(signal.SIGINT, signal.SIG_IGN)
22 |
23 |
24 | def remote(cmd: str, creds: Credential, curr_dir: str=None):
25 | """ runs commands on the remote machine in parallel
26 | @cmd: str, command to run on remote machine
27 | @creds: ssh credentials
28 | @curr_dir(optional): the currenct directory to run the command from
29 | returns the output as string
30 | """
31 | client = paramiko.SSHClient()
32 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
33 | client.connect(**creds.__dict__)
34 | if curr_dir is not None:
35 | make_dirs_remote({curr_dir}, creds)
36 | cmd = f'cd "{curr_dir}"; {cmd}'
37 |
38 | logging.debug(cmd)
39 | _, stdout, stderr = client.exec_command(cmd)
40 | exit_status = stdout.channel.recv_exit_status()
41 |
42 | if exit_status != 0:
43 | raise Exception('Failed to download a file\n%s\n%s' % (stdout.read().encode('utf-8'), stderr.read().encode('utf-8')))
44 |
45 | client.close()
46 | return stdout.read()
47 |
48 |
49 | def run_remote_batch(cmds: list, creds: Credential, curr_dir: str=None, parallelism: int=10):
50 | """ runs commands on the remote machine in parallel
51 | @cmds: list of commands to run in parallel
52 | @creds: ssh credentials
53 | @curr_dir(optional): the currenct directory to run the command from
54 | @parallelism: int - how many commands to run at the same time
55 | """
56 | client = paramiko.SSHClient()
57 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
58 | try:
59 | client.connect(**creds.__dict__)
60 | except TimeoutError:
61 | raise Exception(f"Failed to connect to {creds.hostname}. Attempt timed out.")
62 |
63 | ind = 0
64 | while ind list[str]:
115 | """
116 | @include: a wild card pattern to include files or folders, default is '*'
117 | @exclude: list of wild card patterns to exclude files or folders
118 | returns 2 lists of strings which are folder paths and file paths
119 | """
120 | files = []
121 | folders = []
122 | root = pathlib.Path(start_dir)
123 | for path in root.rglob(include):
124 | if path.is_file():
125 | path = path.absolute().as_posix()
126 | if exclude:
127 | for ex in exclude:
128 | if re.match(ex.replace('*', '.*'), path):
129 | continue
130 | files.append(path)
131 | else: # folder:
132 | folders.append(path.absolute().as_posix())
133 | return folders, files
134 |
135 |
136 | def __add_path(path: str, files: list, folders: list):
137 | """
138 | @path: str, it starts with with 'F: ' or 'D: ' to distinguish file from folders
139 | @files: list of files to add to
140 | @folders: list of folders to add to
141 | """
142 | if path[:2] == 'F:':
143 | files.append(path[2:].strip())
144 | else:
145 | folders.append(path[2:].strip())
146 |
147 | def find_remote(start_dir: str, creds: Credential, include: str='*', exclude: list=None):
148 | """
149 | @include: a wild card pattern
150 | returns 2 lists of strings which are folder paths and file paths
151 | """
152 | files = []
153 | folders = []
154 | cmd = 'find %s -type f -name "%s" -exec echo "F: {}" \\; -o -type d -exec echo "D: {}" \\;' % (start_dir, include)
155 | client = paramiko.SSHClient()
156 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
157 | client.connect(**creds.__dict__)
158 |
159 | stdout = client.exec_command(cmd)[1]
160 | output = stdout.read().decode('utf-8')
161 | paths = list(set(output.splitlines()))
162 | if exclude is None or len(exclude) < 1:
163 | for path in paths:
164 | __add_path(path, files, folders)
165 | else:
166 | exclude_pat = '|'.join(exclude).replace('*', '.*')
167 | for path in paths:
168 | path = path.strip()
169 | if not re.match(exclude_pat, path):
170 | __add_path(path, files, folders)
171 | return folders, files
172 |
--------------------------------------------------------------------------------
/parallel_sync/rsync.py:
--------------------------------------------------------------------------------
1 | """
2 | This module copies files in parallel up or down stream
3 | from or to a remote host
4 | """
5 | import os
6 | import re
7 | import hashlib
8 | import platform
9 | import subprocess
10 | from multiprocessing.pool import ThreadPool
11 | from functools import partial
12 | import logging
13 | from . import Credential, executor
14 | logging.basicConfig(level='INFO')
15 |
16 |
17 | def upload(src: str, dst: str, creds: Credential,
18 | tries: int=1, include: list='*', exclude: list=None,
19 | parallelism: int=10, extract: bool=False,
20 | validate: bool=False, additional_params: str='-c'):
21 | """
22 | @src, @dst: source and destination directories
23 | @creds: ssh credentials
24 | @validate: bool - if True, it will perform a checksum comparison after the operation
25 | @additional_params: str - additional parameters to pass on to rsync
26 | """
27 | __transfer(src, dst, creds, upstream=True,\
28 | tries=tries, include=include, exclude=exclude, parallelism=parallelism,\
29 | extract=extract, validate=validate, additional_params=additional_params)
30 |
31 |
32 | def download(src: str, dst: str, creds: Credential,
33 | tries: int=1, include: str='*', exclude: list=None,
34 | parallelism: int=10, extract: bool=False,
35 | validate: bool=False, additional_params: str='-c'):
36 | """
37 | @src, @dst: source and destination directories
38 | @creds: ssh credentials
39 | @validate: bool - if True, it will perform a checksum comparison after the operation
40 | @additional_params: str - additional parameters to pass on to rsync
41 | """
42 | __transfer(src, dst, creds, upstream=False,
43 | tries=tries, include=include, exclude=exclude, parallelism=parallelism, extract=extract,
44 | validate=validate, additional_params=additional_params)
45 |
46 |
47 | def __transfer(src: str, dst: str, creds: Credential, upstream: bool=True,
48 | tries: int=1, include: str='*', exclude: list=None, parallelism: int=10, extract: bool=False,
49 | validate: bool=False, additional_params: str='-c'):
50 | """
51 | @src: str path of a file or folder for source
52 | @dst: path of a file or folder for destination
53 | @creds: ssh credentials
54 | @upstream: bool, whether it is upload or not (False means download)
55 | @tries: int, how many times to try
56 | @include: wild card pattern
57 | @exclude: list of wild card patterns
58 | @parallelism(default=10): number of parallel processes to use
59 | @extract: bool - whether to extract tar or zip files after transfer
60 | @validate: whether to do a checksum validation at the end
61 | @additional_params: str - additional parameters to pass on to rsync
62 | """
63 | if src is None:
64 | raise ValueError('src cannot be None')
65 |
66 | if dst is None:
67 | raise ValueError('dst cannot be None')
68 |
69 | srcs = []
70 | if upstream and os.path.isfile(src):
71 | srcs = [src]
72 | else:
73 | if upstream: # upload
74 | folder_srcs, srcs = executor.find_local(src, include=include, exclude=exclude)
75 | else: # download
76 | folder_srcs, srcs = executor.find_remote(src, creds, include=include, exclude=exclude)
77 |
78 | folder_dsts = set([__get_dst_path(src, s, dst) for s in folder_srcs if s!=src] + [dst])
79 | __make_dirs(folder_dsts, creds, upstream)
80 |
81 | if len(srcs) < 1:
82 | logging.warning('No source files found to transfer.')
83 | return
84 |
85 | paths = []
86 | for s_path in srcs:
87 | paths.append((s_path, __get_dst_path(src, s_path, dst)))
88 |
89 | __transfer_paths(paths, creds, upstream,
90 | tries=tries, parallelism=parallelism, extract=extract,
91 | validate=validate, additional_params=additional_params)
92 |
93 | def __get_dst_path(src: str, src_path:str, dst_dir: str):
94 | """
95 | @src: str, the root of source directory to copy from
96 | @src_path: str, the full path of file or folder to copy
97 | @dst_dir: str, the destination folder
98 | returns the destination full file path
99 | Example: src=C:/temp/testdir
100 | src_path=C:/temp/testdir/emptydir
101 | dst_dir=/tmp/testdir
102 | returns /tmp/testdir/emptydir
103 |
104 | """
105 | postfix = src_path[len(src):]
106 | if len(postfix) < 1: # src must be a file
107 | postfix = src.replace('\\', '/').split('/')[-1]
108 |
109 | if postfix.startswith('/') or postfix.startswith('\\'):
110 | postfix = postfix[1:]
111 |
112 | if dst_dir.endswith('/'):
113 | dst_dir = dst_dir[:-1]
114 | return f'{dst_dir}/{postfix}'
115 |
116 |
117 | def __make_dirs(folders: set, creds: Credential, upstream: bool):
118 | """
119 | @folders: set of folder paths
120 | @creds: ssh credentials
121 | @upstream: bool, whether to upload or downolad
122 | Creates directories on the remote machine
123 | """
124 | if upstream:
125 | executor.make_dirs_remote(folders, creds=creds)
126 | else:
127 | for folder in folders:
128 | os.makedirs(folder, exist_ok=True)
129 |
130 |
131 | def __is_rsync_installed():
132 | """
133 | returns bool, whether rsync is installed on the local machine or now
134 | """
135 | if 'Windows' in platform.system():
136 | return False
137 |
138 | proc = subprocess.run("which rsync", shell=True, check=False)
139 | return proc.returncode == 0
140 |
141 |
142 | def __get_transfer_commands(creds: Credential, upstream: bool,
143 | paths: list, additional_params: str='-c') -> list:
144 | """
145 | @paths: list of tuples of (source_path, dest_path)
146 | note that source_path can be either local or remote
147 | @creds: ssh Credentials
148 | @upstream: bool whether it is upload or download
149 | @additional_params: str. You can pass additional rsync parameters. The default is just '-c'
150 | returns a list of commands to be run locally
151 | """
152 | rsync = f"rsync {additional_params} -e 'ssh -i {creds.key_filename}' "\
153 | "-o StrictHostKeyChecking=no -o ServerAliveInterval=100"
154 |
155 | cmds = []
156 | for src, dst in paths:
157 | cmd = None
158 | if upstream and os.path.isdir(src):
159 | cmd = f'ssh -p {creds.port} {creds.username}@{creds.hostname} -i "{creds.key_filename}" mkdir -p {dst}'
160 |
161 | elif __is_rsync_installed():
162 | if upstream:
163 | cmd = f'{rsync} "{src}" {creds.username}@{creds.hostname}:"{dst}" --port {creds.port}'
164 | else: # download:
165 | cmd = f'{rsync} {creds.username}@{creds.hostname}:"{src}" "{dst}"'
166 |
167 | else: # then use scp:
168 | if upstream:
169 | cmd = f'scp -P {creds.port} -i "{creds.key_filename}" "{src}" {creds.username}@{creds.hostname}:"{dst}"'
170 | else: # download:
171 | cmd = f'scp -P {creds.port} -i "{creds.key_filename}" {creds.username}@{creds.hostname}:"{src}" "{dst}"'
172 |
173 | cmds.append(cmd)
174 | return cmds
175 |
176 |
177 |
178 | def __transfer_paths(paths: list, creds: Credential, upstream: bool=True, tries: int=1,
179 | parallelism: int=10, extract: bool=False, validate: bool=False, additional_params: str='-c'):
180 | """
181 | @paths: list of tuples of (source_path, dest_path)
182 | note that source_path can be either local or remote
183 | @creds: ssh Credentials
184 | @upstream: bool whether it is upload or download
185 | @tries: int. How many times to try to transfer the file.
186 | Default is 1. You can specify more then time to retry.
187 | @parallelism: int. How many processes to evoke to do the file transfer
188 | @extract: bool, whether after transfering the file it needs to be extracted
189 | @validate: bool, whether you want to do a checksum validation after the transfer
190 | @additional_params: str. You can pass additional rsync parameters. The default is just '-c'
191 | """
192 | if len(paths) < 1:
193 | raise ValueError('You did not specify any paths')
194 |
195 |
196 | if creds.hostname in ['', None]:
197 | raise Exception('The host is not specified.')
198 |
199 | # __make_dirs(paths, creds, upstream)
200 | cmds = __get_transfer_commands(creds, upstream, paths, additional_params)
201 | pool = ThreadPool(processes=parallelism)
202 | func = partial(executor.local, tries=tries)
203 | pool.map(func, cmds)
204 | pool.close()
205 | pool.join()
206 |
207 | if validate and len(paths) > 0:
208 | validate_checksums(creds, upstream, parallelism, paths)
209 |
210 | if extract:
211 | extract_files(creds, upstream, paths)
212 |
213 |
214 | def extract_files(creds, upstream, paths):
215 | """
216 | :param creds: dictionary
217 | :param upstream: boolean
218 | :param paths: list of tuples of (source_path, dest_path)
219 | """
220 | logging.info('File extraction...')
221 | if upstream: # local=source, remote=dest
222 | cmds = []
223 | for _, path in paths:
224 | if path.endswith('.gz'):
225 | cmds.append(f'gunzip "{path}"')
226 | if len(cmds) > 0:
227 | executor.remote_batch(cmds, creds)
228 |
229 | else: # local=dest, remote=source
230 | cmds = []
231 | for _, path in paths:
232 | if path.endswith('.gz'):
233 | cmds.append(f'gunzip "{path}"')
234 | if len(cmds) > 0:
235 | executor.local_batch(cmds)
236 |
237 |
238 | def validate_checksums(creds, upstream, parallelism, paths):
239 | """
240 | :param creds: a dictionary with the ssh credentials
241 | :param upstream: boolean
242 | :param paths: is a list of two paths: local path and remote path
243 | if fails, it raises an Exception
244 | """
245 | logging.info('Checksum validation...')
246 | func = partial(checksum_validator, creds)
247 | # transform paths to be a pair of local and remote paths:
248 | paths2 = []
249 | if upstream: # local=source, remote=dest
250 | paths2 = [(src, dst) for src, dst in paths]
251 |
252 | else: # local=dest, remote=source
253 | paths2 = [(dst, src) for src, dst in paths]
254 |
255 | pool = ThreadPool(processes=parallelism)
256 | pool.map(func, paths2)
257 | pool.close()
258 | pool.join()
259 |
260 |
261 | def checksum_validator(creds, paths):
262 | """
263 | :param creds: a dictionary with the ssh credentials
264 | :param paths: is a list of two paths: local path and remote path
265 | if fails, it raises an Exception
266 | """
267 | local_path, remote_path = paths
268 | checksum1 = executor.local(f'md5sum "{local_path}"').split(' ')[0]
269 | checksum2 = executor.remote(f'md5sum "{remote_path}"', creds).split(' ')[0]
270 | if checksum1 != checksum2:
271 | raise Exception('checksum mismatch for %s' % paths)
272 | logging.info('Verified: filename=%s checksum=%s', os.path.basename(local_path), checksum1)
273 |
274 | class CheckSumMismatch(Exception):
275 | pass
276 |
277 | def local_checksum_validator(paths: list):
278 | """
279 | @paths: list of tuples of (source_path, dest_path)
280 | """
281 | for src, dst in paths:
282 | checksum1 = hashlib.md5(open(src, 'rb').read()).hexdigest()
283 | checksum2 = hashlib.md5(open(dst, 'rb').read()).hexdigest()
284 | if checksum1 != checksum2:
285 | raise CheckSumMismatch(f'checksum mismatch for\n{src}\n{dst}')
286 |
--------------------------------------------------------------------------------
/parallel_sync/wget.py:
--------------------------------------------------------------------------------
1 | """
2 | This module manages file operations such as parallel download
3 | """
4 | import os
5 | from . import executor, compression, Credential
6 | TIMEOUT = 40
7 |
8 |
9 | def __url_to_filename(url: str):
10 | """ retrieves the filename from the url """
11 | filename = os.path.basename(url).strip()
12 | if filename.endswith('?'):
13 | filename = filename[:-1]
14 | return filename
15 |
16 |
17 | def download(creds: Credential, target_dir: str, urls: list,
18 | filenames: list=None, parallelism: int=10, tries: int=3,
19 | extract: bool=False, timeout: int=TIMEOUT):
20 | """ downloads large files on a remote machine
21 | @creds: ssh credentials
22 | @target_dir: where to download to
23 | @urls: a list of urls or a single url
24 | @filenames: list of filenames. If used, the the urls will be downloaded to
25 | those file names
26 | @parallelism(default=10): number of parallel processes to use
27 | @extract: boolean - whether to extract tar or zip files after download
28 | """
29 | if isinstance(urls, str):
30 | urls = [urls]
31 |
32 | if not isinstance(urls, list):
33 | raise ValueError(f'Expected a list of urls. Received {urls}')
34 |
35 | if not os.path.exists(target_dir):
36 | os.makedirs(target_dir)
37 |
38 | cmds = []
39 | if filenames is not None and len(filenames) != len(urls):
40 | raise ValueError('You have specified filenames but the number '\
41 | 'of filenames does not match the number of urls')
42 |
43 | filenames = [__url_to_filename(url) for url in urls]
44 | for ind, _url in enumerate(urls):
45 | filename = filenames[ind]
46 | file_path = f'{target_dir}/{filename}'
47 | cmd = f'wget -O "{file_path}" -t {tries} -T {timeout} "{_url}"'
48 | # note: don't use the -q option because
49 | # if it fails, you don't get any message or return code
50 | if extract:
51 | ext = compression.get_unzip_cmd(file_path)
52 | if ext is not None:
53 | cmd = f'{cmd};cd "{target_dir}";{ext} "{filename}"'
54 | cmds.append(cmd)
55 |
56 | executor.run_remote_batch(cmds, creds, curr_dir=target_dir, parallelism=parallelism)
57 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | paramiko
3 | six
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | release = egg_info -RDb ''
3 |
4 | [metadata]
5 | description-file = README.md
6 |
7 | [bdist_wheel]
8 | universal=1
9 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """
2 | **parallel_sync**
3 |
4 |
5 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks on Linux.
6 | The files can be transferred from a remote linux host or a url.
7 |
8 |
9 | Link
10 | `````
11 |
12 | * Source
13 | https://github.com/kouroshparsa/parallel_sync
14 |
15 | """
16 | from setuptools import setup, find_packages
17 | import os
18 | from distutils import sysconfig
19 |
20 | inc_path = sysconfig.get_config_vars()['INCLUDEPY']
21 | if not os.path.exists(os.path.join(inc_path, 'Python.h')):
22 | print('WARNING: You must install python headers to install the Paramiko dependency.'
23 | '\nExample on ubuntu: sudo apt-get install python-dev'
24 | '\nExample on centos: sudo yum install python-dev')
25 |
26 | version = '2.0.1'
27 | setup(
28 | name='parallel_sync',
29 | version=version,
30 | url='https://github.com/kouroshparsa/parallel_sync',
31 | download_url='https://github.com/kouroshparsa/parallel_sync/packages/%s' % version,
32 | license='GNU',
33 | author='Kourosh Parsa',
34 | author_email="kouroshtheking@gmail.com",
35 | description='A Parallelized file/url syncing package',
36 | long_description=__doc__,
37 | packages=find_packages(),
38 | install_requires=['paramiko>=1.15.2', 'six'],
39 | python_requires='>=3',
40 | include_package_data=True,
41 | zip_safe=False,
42 | platforms='Linux',
43 | classifiers=[
44 | 'Operating System :: Unix',
45 | 'Programming Language :: Python'
46 | ]
47 | )
48 |
--------------------------------------------------------------------------------
/smoke_tests/rsync.py:
--------------------------------------------------------------------------------
1 | """
2 | This file has smoke tests. Here is how you can run them:
3 | python .\smoke_tests\rsync.py
4 | """
5 | from parallel_sync import rsync, Credential
6 | import unittest
7 |
8 | class TestStringMethods(unittest.TestCase):
9 |
10 | def test_upload_file(self):
11 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa')
12 | rsync.upload('c:/temp/test.txt', '/tmp/', creds=creds)
13 |
14 | def test_upload_dir(self):
15 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa')
16 | rsync.upload('c:/temp/testdir', '/tmp/testdir', creds=creds)
17 |
18 | def test_download_file(self):
19 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa')
20 | rsync.download('/tmp/test.txt', 'C:/temp/x/', creds=creds)
21 |
22 | def test_download_dir(self):
23 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa')
24 | rsync.download('/tmp/testdir', 'c:/temp/z', creds=creds)
25 |
26 | if __name__ == '__main__':
27 | unittest.main()
--------------------------------------------------------------------------------
/tests/test_rsync.py:
--------------------------------------------------------------------------------
1 | """
2 | This file has the unittests, to run use this command:
3 | pytest
4 | """
5 | from parallel_sync import rsync, Credential
6 | import pytest
7 | from unittest.mock import patch
8 |
9 | def test_upload_null_params():
10 | with pytest.raises(Exception):
11 | rsync.upload(None, None, creds=None)
12 |
13 |
14 | def test_get_dst_path():
15 | assert rsync.__get_dst_path('/x', '/x/a', '/tmp') == '/tmp/a'
16 | assert rsync.__get_dst_path('/x', '/x/a/b', '/tmp') == '/tmp/a/b'
17 | assert rsync.__get_dst_path('/x/filename', '/x/filename', '/tmp') == '/tmp/filename'
18 | assert rsync.__get_dst_path('C:\\x\\filename', 'C:\\x\\filename', '/tmp') == '/tmp/filename'
19 |
20 | class MockStdOut:
21 | class Channel:
22 | def recv_exit_status(self):
23 | return 0
24 | channel = Channel()
25 | def read(self):
26 | return ''
27 |
28 | @patch('parallel_sync.executor.find_local')
29 | @patch('paramiko.SSHClient.connect')
30 | @patch('paramiko.SSHClient.exec_command')
31 | @patch('parallel_sync.rsync.__get_transfer_commands')
32 | def test_upload(mock_tr_cmd, mock_exec_command, mock_connect, mock_find_local):
33 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k')
34 | mock_find_local.return_value = ['/src_dir/a', '/src_dir/b']
35 | mock_connect.return_value = None
36 | buffer = MockStdOut()
37 | mock_exec_command.return_value = [None, buffer, buffer]
38 | mock_tr_cmd.return_value = []
39 | rsync.upload('/src_dir', '/dst_dir', creds=creds)
40 | assert mock_tr_cmd.called
41 |
42 | @patch('parallel_sync.rsync.__is_rsync_installed')
43 | def test_get_transfer_commands_rsync_upstream(mock_is_rsync_installed):
44 | mock_is_rsync_installed.return_value = True
45 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k')
46 | paths = [('/src/1', '/dst/1'),
47 | ('/src/2', '/dst/2')]# first source, then destination path
48 | cmds = rsync.__get_transfer_commands(creds, True, paths)
49 | assert cmds == ['rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 "/src/1" u@h:"/dst/1" --port 3022',
50 | 'rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 "/src/2" u@h:"/dst/2" --port 3022']
51 |
52 | @patch('parallel_sync.rsync.__is_rsync_installed')
53 | def test_get_transfer_commands_rsync_downstream(mock_is_rsync_installed):
54 | mock_is_rsync_installed.return_value = True
55 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k')
56 | paths = [('/src/1', '/dst/1'),
57 | ('/src/2', '/dst/2')]# first source, then destination path
58 | cmds = rsync.__get_transfer_commands(creds, False, paths)
59 | assert cmds == ['rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 u@h:"/src/1" "/dst/1"',
60 | 'rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 u@h:"/src/2" "/dst/2"']
61 |
62 | @patch('parallel_sync.rsync.__is_rsync_installed')
63 | def test_get_transfer_commands_scp_upstream(mock_is_rsync_installed):
64 | mock_is_rsync_installed.return_value = False
65 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k')
66 | paths = [('/src/1', '/dst/1'),
67 | ('/src/2', '/dst/2')]# first source, then destination path
68 | cmds = rsync.__get_transfer_commands(creds, True, paths)
69 | assert cmds == ['scp -P 3022 -i "k" "/src/1" u@h:"/dst/1"',
70 | 'scp -P 3022 -i "k" "/src/2" u@h:"/dst/2"']
71 |
72 | @patch('parallel_sync.rsync.__is_rsync_installed')
73 | def test_get_transfer_commands_scp_downstream(mock_is_rsync_installed):
74 | mock_is_rsync_installed.return_value = False
75 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k')
76 | paths = [('/src/1', '/dst/1'),
77 | ('/src/2', '/dst/2')]# first source, then destination path
78 | cmds = rsync.__get_transfer_commands(creds, False, paths)
79 | assert cmds == ['scp -P 3022 -i "k" u@h:"/src/1" "/dst/1"',
80 | 'scp -P 3022 -i "k" u@h:"/src/2" "/dst/2"']
81 |
--------------------------------------------------------------------------------