├── .gitconfig ├── .gitignore ├── LICENSE ├── README.md ├── V1_Doc.md ├── docs ├── Makefile ├── conf.py ├── index.rst ├── make.bat └── pages │ ├── examples.rst │ ├── installation.rst │ └── requirements.rst ├── driver.py ├── fabfile.py ├── parallel_sync ├── __init__.py ├── compression.py ├── downloader.py ├── executor.py ├── rsync.py └── wget.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── smoke_tests └── rsync.py └── tests └── test_rsync.py /.gitconfig: -------------------------------------------------------------------------------- 1 | [user] 2 | email = kouroshtheking@gmail.com 3 | name = kouroshparsa 4 | 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # V2.x parallel_sync Documentation 2 | 3 | Documentation for the older versions of the package are at: V1_Doc 4 | 5 | **Introduction** 6 | 7 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks. It can do operations such as rsync, scp, wget. 8 | It can use used on both Windows and Linux and Mac OS. Note that on Windows, you need to have OpenSsh enabled and the package will automaticalled use scp instead of rsync. 9 | 10 | **How to install:** 11 | 12 | `pip install parallel_sync` 13 | 14 | **Requirement:** 15 | - Python >= 3 16 | - ssh service must be installed and running. 17 | - if rsync is installed on the local machine, it will be used, otherwise it will fall back to using scp. 18 | - To use the wget method, you need to install wget on the target machine 19 | - To untar/unzip files you need tar/zip packages installed on the target machine 20 | 21 | **Benefits:** 22 | - Very fast file transfer (parallelized) 23 | - If the file exists and is not changed, it will not waste time copying it 24 | - You can specify retries in case you have a bad connection 25 | - It can handle large files 26 | 27 | In most of the examples below, you can specify `parallelism` and `tries` which allow you to parallelize tasks and retry upon failure. 28 | By default `parallelism` is set to 10 workers and tries is 1. 29 | 30 | ## Upstream Example: 31 | ```python 32 | from parallel_sync import rsync, Credential 33 | creds = Credential(username='user', 34 | hostname='192.168.168.9', 35 | port=3022, 36 | key_filename='~/.ssh/id_rsa') 37 | rsync.upload('/tmp/x', '/tmp/y', creds=creds, exclude=['*.pyc', '*.sh']) 38 | ``` 39 | 40 | ## Downstream Example: 41 | 42 | ```python 43 | from parallel_sync import rsync 44 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 45 | rsync.download('/tmp/y', '/tmp/z', creds=creds) 46 | ``` 47 | 48 | ## Using non-default Ports 49 | ```python 50 | from parallel_sync import rsync, Credential 51 | creds = Credential(username='user', 52 | hostname='192.168.168.9', 53 | port=3022, 54 | key_filename='~/.ssh/id_rsa') 55 | rsync.download('/tmp/y', '/tmp/z', creds=creds) 56 | ``` 57 | 58 | 59 | ## Downloading files on a remote machine: 60 | 61 | For this, you need to have wget installed on the remote machine. 62 | ```python 63 | from parallel_sync import wget, Credential 64 | creds = Credential(username='user', 65 | hostname='192.168.168.9', 66 | port=3022, 67 | key_filename='~/.ssh/id_rsa') 68 | urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip'] 69 | wget.download('/tmp', urls=urls, creds=creds) 70 | ``` 71 | 72 | ## Downloading files on the local machine 73 | Downloading files using requests package locally is simple but what if you want to parallelize it? 74 | Here is the solution for that: 75 | ```python 76 | from parallel_sync import downloader 77 | urls = ['http://something1', 'http://somthing2', 'http://somthing3'] 78 | download('c:/temp/x', 79 | extension='.png', parallelism=10) 80 | ``` 81 | 82 | ## Integration with Fabric: 83 | ``` 84 | from fabric import task 85 | from parallel_sync import rsync, wget, get_fabric_credentials 86 | 87 | @task 88 | def deploy(conn): 89 | creds = get_fabric_credentials(conn) 90 | urls = ['http://something1', 'http://somthing2', 'http://somthing3'] 91 | wget.download(creds, '/tmp/images', urls) 92 | rsync.upload('/src', '/dst', creds, tries=3) 93 | ``` 94 | 95 | Here you have a task called deploy. You can run it using the following command: 96 | ``` 97 | fab [user]@[hostname]:[port] -i [path to you key file] deploy 98 | ``` 99 | 100 | If you come across any bugs, please report it on github. 101 | -------------------------------------------------------------------------------- /V1_Doc.md: -------------------------------------------------------------------------------- 1 | 2 | # V1.x parallel_sync Documentation 3 | 4 | **Introduction** 5 | 6 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks on Linux. 7 | The files can be transferred from a remote linux host or a url. 8 | 9 | **How to install:** 10 | 11 | `pip install parallel_sync` 12 | 13 | **Requirement:** 14 | - Python >= 2.6 Linux Only! 15 | - ssh service must be installed and running. 16 | - To use the rsync features, you need to have rsync installed. 17 | - to use the url module, you need to install wget on the target machine 18 | - To untar/unzip files you need tar/zip packages installed 19 | 20 | **Benefits:** 21 | - Very fast file transfer (parallelized) 22 | - If the file exists and is not changed, it will not waste time copying it 23 | - You can specify retries in case you have a bad connection 24 | - It can handle large files 25 | 26 | In most of the examples below, you can specify `parallelism` and `tries` which allow you to parallelize tasks and retry upon failure. 27 | By default `parallelism` is set to 10 workers. 28 | 29 | ## Upstream Example: 30 | ```python 31 | from parallel_sync import rsync 32 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 33 | rsync.upload('/tmp/x', '/tmp/y', creds=creds, exclude=['*.pyc', '*.sh']) 34 | ``` 35 | 36 | ## Downstream Example: 37 | 38 | ```python 39 | from parallel_sync import rsync 40 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 41 | rsync.download('/tmp/y', '/tmp/z', creds=creds) 42 | ``` 43 | 44 | ## Using non-default Ports 45 | ```python 46 | from parallel_sync import rsync 47 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31', 'port': 1234} 48 | rsync.download('/tmp/y', '/tmp/z', creds=creds) 49 | ``` 50 | 51 | 52 | ## File Download Example: 53 | 54 | ```python 55 | from parallel_sync import wget 56 | urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip'] 57 | wget.download('/tmp', urls=urls, extract=True) 58 | 59 | # download locally with a specified filename: 60 | 61 | wget.download(LOCAL_TARGET, 'http://something/else/file.zip',\ 62 | filenames='x.zip', extract=True) 63 | 64 | # download on a remote machine: 65 | 66 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 67 | wget.download('/tmp', urls=urls, creds=creds) 68 | 69 | # To untar or unzip compressed files after download: 70 | wget.download('/tmp', urls=urls, creds=creds, extract=True) 71 | ``` 72 | 73 | Example extracting a file on a remote host: 74 | 75 | ```python 76 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 77 | from parallel_sync import compression 78 | compression.extract('/tmp/x.tar.gz', creds=creds) 79 | ``` 80 | 81 | Example checking that a files exists on the remote server: 82 | 83 | ```python 84 | from parallel_sync import executor 85 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 86 | path = '/tmp/myfile' 87 | if executor.path_exists(path, creds): 88 | print("yes") 89 | ``` 90 | 91 | Example finding files or directories on a remote server: 92 | 93 | ```python 94 | from parallel_sync import executor 95 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 96 | dir_path = '/tmp/mydir' 97 | files = executor.find_files(dir_path, creds, include=['*.png', '*.jpg']) 98 | 99 | dirs = executor.find_dirs(dir_path, creds, include=['test']) 100 | 101 | # Note that if creds is None, then it will search on localhost 102 | ``` 103 | 104 | Example Running commands: 105 | 106 | ```python 107 | from parallel_sync import executor 108 | 109 | cmds = ['mv /tmp/x /tmp/y', 'touch /tmp/z'] 110 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 111 | executor.run(cmds, creds=creds, parallelism=len(cmds)) 112 | 113 | print(executor.run('pwd', creds=creds, curr_dir='/tmp')) 114 | ``` 115 | 116 | Example using parallel_sync within fabric: 117 | 118 | ```python 119 | from fabric.api import env 120 | from parallel_sync import rsync 121 | 122 | rsync.upload('/tmp/x', '/tmp/y', creds=env) 123 | rsync.download('/tmp/y', '/tmp/z', creds=env) 124 | ``` 125 | 126 | To transfer files locally: 127 | 128 | ```python 129 | from parallel_sync import rsync 130 | rsync.copy('/tmp/x', '/tmp/y', exclude=['*.pyc'], parallelism=10, extract=False, validate=False) 131 | ``` 132 | 133 | where /tmp/x is a directory. 134 | 135 | 136 | If you come across any bugs, please report it on github. 137 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/parallel_sync.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/parallel_sync.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/parallel_sync" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/parallel_sync" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # parallel_sync documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Oct 26 12:02:00 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | import shlex 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | #sys.path.insert(0, os.path.abspath('.')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix(es) of source filenames. 40 | # You can specify multiple suffix as a list of string: 41 | # source_suffix = ['.rst', '.md'] 42 | source_suffix = '.rst' 43 | 44 | # The encoding of source files. 45 | #source_encoding = 'utf-8-sig' 46 | 47 | # The master toctree document. 48 | master_doc = 'index' 49 | 50 | # General information about the project. 51 | project = u'parallel_sync' 52 | copyright = u'2015, Kourosh Parsa' 53 | author = u'Kourosh Parsa' 54 | 55 | # The version info for the project you're documenting, acts as replacement for 56 | # |version| and |release|, also used in various other places throughout the 57 | # built documents. 58 | # 59 | # The short X.Y version. 60 | version = '1.5' 61 | # The full version, including alpha/beta/rc tags. 62 | release = '1.5' 63 | 64 | # The language for content autogenerated by Sphinx. Refer to documentation 65 | # for a list of supported languages. 66 | # 67 | # This is also used if you do content translation via gettext catalogs. 68 | # Usually you set "language" from the command line for these cases. 69 | language = None 70 | 71 | # There are two options for replacing |today|: either, you set today to some 72 | # non-false value, then it is used: 73 | #today = '' 74 | # Else, today_fmt is used as the format for a strftime call. 75 | #today_fmt = '%B %d, %Y' 76 | 77 | # List of patterns, relative to source directory, that match files and 78 | # directories to ignore when looking for source files. 79 | exclude_patterns = ['_build'] 80 | 81 | # The reST default role (used for this markup: `text`) to use for all 82 | # documents. 83 | #default_role = None 84 | 85 | # If true, '()' will be appended to :func: etc. cross-reference text. 86 | #add_function_parentheses = True 87 | 88 | # If true, the current module name will be prepended to all description 89 | # unit titles (such as .. function::). 90 | #add_module_names = True 91 | 92 | # If true, sectionauthor and moduleauthor directives will be shown in the 93 | # output. They are ignored by default. 94 | #show_authors = False 95 | 96 | # The name of the Pygments (syntax highlighting) style to use. 97 | pygments_style = 'sphinx' 98 | 99 | # A list of ignored prefixes for module index sorting. 100 | #modindex_common_prefix = [] 101 | 102 | # If true, keep warnings as "system message" paragraphs in the built documents. 103 | #keep_warnings = False 104 | 105 | # If true, `todo` and `todoList` produce output, else they produce nothing. 106 | todo_include_todos = False 107 | 108 | 109 | # -- Options for HTML output ---------------------------------------------- 110 | 111 | # The theme to use for HTML and HTML Help pages. See the documentation for 112 | # a list of builtin themes. 113 | html_theme = 'alabaster' 114 | 115 | # Theme options are theme-specific and customize the look and feel of a theme 116 | # further. For a list of options available for each theme, see the 117 | # documentation. 118 | #html_theme_options = {} 119 | 120 | # Add any paths that contain custom themes here, relative to this directory. 121 | #html_theme_path = [] 122 | 123 | # The name for this set of Sphinx documents. If None, it defaults to 124 | # " v documentation". 125 | #html_title = None 126 | 127 | # A shorter title for the navigation bar. Default is the same as html_title. 128 | #html_short_title = None 129 | 130 | # The name of an image file (relative to this directory) to place at the top 131 | # of the sidebar. 132 | #html_logo = None 133 | 134 | # The name of an image file (within the static path) to use as favicon of the 135 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 136 | # pixels large. 137 | #html_favicon = None 138 | 139 | # Add any paths that contain custom static files (such as style sheets) here, 140 | # relative to this directory. They are copied after the builtin static files, 141 | # so a file named "default.css" will overwrite the builtin "default.css". 142 | html_static_path = ['_static'] 143 | 144 | # Add any extra paths that contain custom files (such as robots.txt or 145 | # .htaccess) here, relative to this directory. These files are copied 146 | # directly to the root of the documentation. 147 | #html_extra_path = [] 148 | 149 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 150 | # using the given strftime format. 151 | #html_last_updated_fmt = '%b %d, %Y' 152 | 153 | # If true, SmartyPants will be used to convert quotes and dashes to 154 | # typographically correct entities. 155 | #html_use_smartypants = True 156 | 157 | # Custom sidebar templates, maps document names to template names. 158 | #html_sidebars = {} 159 | 160 | # Additional templates that should be rendered to pages, maps page names to 161 | # template names. 162 | #html_additional_pages = {} 163 | 164 | # If false, no module index is generated. 165 | #html_domain_indices = True 166 | 167 | # If false, no index is generated. 168 | #html_use_index = True 169 | 170 | # If true, the index is split into individual pages for each letter. 171 | #html_split_index = False 172 | 173 | # If true, links to the reST sources are added to the pages. 174 | #html_show_sourcelink = True 175 | 176 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 177 | #html_show_sphinx = True 178 | 179 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 180 | #html_show_copyright = True 181 | 182 | # If true, an OpenSearch description file will be output, and all pages will 183 | # contain a tag referring to it. The value of this option must be the 184 | # base URL from which the finished HTML is served. 185 | #html_use_opensearch = '' 186 | 187 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 188 | #html_file_suffix = None 189 | 190 | # Language to be used for generating the HTML full-text search index. 191 | # Sphinx supports the following languages: 192 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 193 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 194 | #html_search_language = 'en' 195 | 196 | # A dictionary with options for the search language support, empty by default. 197 | # Now only 'ja' uses this config value 198 | #html_search_options = {'type': 'default'} 199 | 200 | # The name of a javascript file (relative to the configuration directory) that 201 | # implements a search results scorer. If empty, the default will be used. 202 | #html_search_scorer = 'scorer.js' 203 | 204 | # Output file base name for HTML help builder. 205 | htmlhelp_basename = 'parallel_syncdoc' 206 | 207 | # -- Options for LaTeX output --------------------------------------------- 208 | 209 | latex_elements = { 210 | # The paper size ('letterpaper' or 'a4paper'). 211 | #'papersize': 'letterpaper', 212 | 213 | # The font size ('10pt', '11pt' or '12pt'). 214 | #'pointsize': '10pt', 215 | 216 | # Additional stuff for the LaTeX preamble. 217 | #'preamble': '', 218 | 219 | # Latex figure (float) alignment 220 | #'figure_align': 'htbp', 221 | } 222 | 223 | # Grouping the document tree into LaTeX files. List of tuples 224 | # (source start file, target name, title, 225 | # author, documentclass [howto, manual, or own class]). 226 | latex_documents = [ 227 | (master_doc, 'parallel_sync.tex', u'parallel\\_sync Documentation', 228 | u'Kourosh Parsa', 'manual'), 229 | ] 230 | 231 | # The name of an image file (relative to this directory) to place at the top of 232 | # the title page. 233 | #latex_logo = None 234 | 235 | # For "manual" documents, if this is true, then toplevel headings are parts, 236 | # not chapters. 237 | #latex_use_parts = False 238 | 239 | # If true, show page references after internal links. 240 | #latex_show_pagerefs = False 241 | 242 | # If true, show URL addresses after external links. 243 | #latex_show_urls = False 244 | 245 | # Documents to append as an appendix to all manuals. 246 | #latex_appendices = [] 247 | 248 | # If false, no module index is generated. 249 | #latex_domain_indices = True 250 | 251 | 252 | # -- Options for manual page output --------------------------------------- 253 | 254 | # One entry per manual page. List of tuples 255 | # (source start file, name, description, authors, manual section). 256 | man_pages = [ 257 | (master_doc, 'parallel_sync', u'parallel_sync Documentation', 258 | [author], 1) 259 | ] 260 | 261 | # If true, show URL addresses after external links. 262 | #man_show_urls = False 263 | 264 | 265 | # -- Options for Texinfo output ------------------------------------------- 266 | 267 | # Grouping the document tree into Texinfo files. List of tuples 268 | # (source start file, target name, title, author, 269 | # dir menu entry, description, category) 270 | texinfo_documents = [ 271 | (master_doc, 'parallel_sync', u'parallel_sync Documentation', 272 | author, 'parallel_sync', 'One line description of project.', 273 | 'Miscellaneous'), 274 | ] 275 | 276 | # Documents to append as an appendix to all manuals. 277 | #texinfo_appendices = [] 278 | 279 | # If false, no module index is generated. 280 | #texinfo_domain_indices = True 281 | 282 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 283 | #texinfo_show_urls = 'footnote' 284 | 285 | # If true, do not generate a @detailmenu in the "Top" node's menu. 286 | #texinfo_no_detailmenu = False 287 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. parallel_sync documentation master file, created by 2 | sphinx-quickstart on Mon Oct 26 12:02:00 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to parallel_sync's documentation! 7 | ========================================= 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | pages/requirements 15 | pages/installation 16 | pages/examples 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | 25 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 2> nul 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\parallel_sync.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\parallel_sync.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /docs/pages/examples.rst: -------------------------------------------------------------------------------- 1 | parallel_sync Usage 2 | =================== 3 | 4 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks on Linux. 5 | The files can be transferred from a remote linux host or a url. 6 | 7 | Benefits: 8 | * Very fast file transfer (parallelized) 9 | * If the file exists and is not changed, it will not waste time copying it 10 | * You can specify retries in case you have a bad connection 11 | * It can handle large files 12 | 13 | In most of the examples below, you can specify **parallelism** and **tries** which allow you to parallelize tasks and retry upon failure. 14 | By default **parallelism** is set to 10 workers. 15 | 16 | 17 | Upstream Example: 18 | :: 19 | from parallel_sync import rsync 20 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 21 | rsync.upload('/tmp/local_dir', '/tmp/remote_dir', creds=creds) 22 | 23 | Downstream Example: 24 | :: 25 | from parallel_sync import rsync 26 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 27 | rsync.download('/tmp/remote_dir', '/tmp/local_dir', creds=creds) 28 | 29 | 30 | File Download Example: 31 | :: 32 | from parallel_sync import wget 33 | urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip'] 34 | wget.download('/tmp', urls, extract=True) 35 | 36 | # download on a remote machine: 37 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 38 | wget.download('/tmp', urls, creds=creds) 39 | 40 | # To untar or unzip compressed files after download: 41 | wget.download('/tmp', urls, creds=creds, extract=True) 42 | 43 | 44 | Example extracting a file on a remote host: 45 | :: 46 | creds = {'user': 'myusername', 'key':'~/.ssh/id_rsa', 'host':'192.168.16.31'} 47 | rom parallel_sync import compression 48 | compression.extract('/tmp/x.tar.gz', creds=creds) 49 | 50 | 51 | Example checking that a files exists: 52 | :: 53 | from parallel_sync import executor 54 | if executor.path_exists(path, creds): 55 | print "yes" 56 | 57 | 58 | Example finding files or directories: 59 | :: 60 | from parallel_sync import executor 61 | files = executor.find_files(dir_path, creds, include=['*.png', '*.jpg']) 62 | dirs = executor.find_dirs(dir_path, creds, include=['test']) 63 | 64 | Note that if creds is None, then it will search on localhost 65 | 66 | 67 | Example Running commands: 68 | :: 69 | from parallel_sync import executor 70 | cmds = ['mv /tmp/x /tmp/y', 'touch /tmp/z'] 71 | executor.run(cmds, creds=creds, parallelism=len(cmds)): 72 | print executor.run('pwd', creds=creds, curr_dir='/tmp') 73 | 74 | 75 | Example using parallel_sync within fabric: 76 | :: 77 | from fabric.api import env 78 | from parallel_sync import rsync 79 | rsync.upload('/tmp/x', '/tmp/y', creds=env) 80 | rsync.download('/tmp/y', '/tmp/z', creds=env) 81 | 82 | 83 | .. _github-link: https://github.com/kouroshparsa/parallel_sync 84 | If you come across any bugs, please report it on 85 | https://github.com/kouroshparsa/parallel_sync 86 | 87 | -------------------------------------------------------------------------------- /docs/pages/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ======== 3 | 4 | It's as easy as: 5 | ``pip install parallel_rsync`` 6 | -------------------------------------------------------------------------------- /docs/pages/requirements.rst: -------------------------------------------------------------------------------- 1 | Requirements 2 | ======== 3 | 4 | 1. Python 2.7 (version 3 is not supported yet) 5 | 2. Linux only 6 | 3. To download urls, you need wget installed on the target machine 7 | 4. To send files up or down stream to another host, you need rsync installed on both machines 8 | 9 | -------------------------------------------------------------------------------- /driver.py: -------------------------------------------------------------------------------- 1 | from parallel_sync import rsync, Credential 2 | creds = Credential(username='user', 3 | hostname='192.168.168.9', 4 | port=3022, 5 | key_filename='~/.ssh/id_rsa') 6 | rsync.upload('/tmp/x', '/tmp/y', creds=creds, exclude=['*.pyc', '*.sh']) -------------------------------------------------------------------------------- /fabfile.py: -------------------------------------------------------------------------------- 1 | from fabric import task 2 | from parallel_sync import rsync, wget, get_fabric_credentials 3 | 4 | @task 5 | def deploy(conn): 6 | creds = get_fabric_credentials(conn) 7 | urls = ['https://images.unsplash.com/photo-1682695798256-28a674122872?q=80&w=3870&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDF8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D', 8 | 'https://images.unsplash.com/photo-1682687982360-3fbab65f9d50?q=80&w=3870&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDF8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'] 9 | wget.download(creds, '/tmp/images', urls) 10 | -------------------------------------------------------------------------------- /parallel_sync/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is the core package module 3 | """ 4 | import logging 5 | from dataclasses import dataclass 6 | logging.basicConfig(level='INFO') 7 | logging.getLogger("paramiko").setLevel(logging.WARNING) 8 | 9 | class IllegalArgumentError(ValueError): 10 | pass 11 | @dataclass 12 | class Credential: 13 | key_filename: str 14 | username: str 15 | hostname: str 16 | port: int = 22 17 | timeout: int = 10 # seconds 18 | 19 | 20 | def get_fabric_credentials(conn) -> Credential: 21 | """ 22 | @conn: fabric connection object of type fabric.connection.Connection 23 | Returns a Credential object 24 | """ 25 | import fabric 26 | if not isinstance(conn, fabric.connection.Connection): 27 | raise IllegalArgumentError('Invalid parameter. You are supposed to pass ' 28 | 'an object of type fabric.connection.Connection') 29 | user = conn.user 30 | host = conn.host 31 | port = conn.port 32 | key = conn.connect_kwargs['key_filename'][0] 33 | return Credential(key_filename=key, username=user, hostname=host, port=port) 34 | -------------------------------------------------------------------------------- /parallel_sync/compression.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is for handling unziping of archived files 3 | """ 4 | 5 | def get_unzip_cmd(path: str): 6 | """ 7 | @path: str 8 | returns the command to unzip that specified file 9 | """ 10 | if path.endswith('.tar.gz'): 11 | return 'tar -zxf' 12 | elif path.endswith('.gz'): 13 | return 'gunzip' 14 | elif path.endswith('.zip'): 15 | return 'unzip' 16 | 17 | return None 18 | -------------------------------------------------------------------------------- /parallel_sync/downloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from multiprocessing.pool import ThreadPool 3 | from functools import partial 4 | from urllib import parse, request 5 | 6 | def __download(folder: str, url: str, extension: str=None): 7 | """ 8 | @folder: where to download to 9 | @url: url to download from 10 | @extension: if specified, then you'd add this extension to the filename 11 | """ 12 | scheme, netloc, path, query, fragment = parse.urlsplit(url) 13 | filename = os.path.basename(path) 14 | if extension is not None: 15 | if not extension.startswith('.'): 16 | extension = f'.{extension}' 17 | filename = f'{filename}{extension}' 18 | 19 | with request.urlopen(url) as f: 20 | with open(os.path.join(folder, filename), 'wb') as output: 21 | output.write(f.read()) 22 | 23 | def download(folder: str, urls: list, extension=None, parallelism: int=10): 24 | pool = ThreadPool(processes=parallelism) 25 | async_results = [] 26 | for url in urls: 27 | async_results.append(pool.apply_async(__download, (folder, url, extension))) 28 | 29 | for res in async_results: 30 | res.get() 31 | -------------------------------------------------------------------------------- /parallel_sync/executor.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the central module that does common operations 3 | either locally or remotely. 4 | It can do operations in parallel batches as well 5 | """ 6 | import signal 7 | import re 8 | import pathlib 9 | import logging 10 | import subprocess 11 | from six import string_types 12 | import paramiko 13 | from . import Credential 14 | logging.basicConfig(level='INFO') 15 | 16 | from queue import Queue 17 | 18 | 19 | def init_worker(): 20 | """ use this Pool initializer to allow keyboard interruption """ 21 | signal.signal(signal.SIGINT, signal.SIG_IGN) 22 | 23 | 24 | def remote(cmd: str, creds: Credential, curr_dir: str=None): 25 | """ runs commands on the remote machine in parallel 26 | @cmd: str, command to run on remote machine 27 | @creds: ssh credentials 28 | @curr_dir(optional): the currenct directory to run the command from 29 | returns the output as string 30 | """ 31 | client = paramiko.SSHClient() 32 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 33 | client.connect(**creds.__dict__) 34 | if curr_dir is not None: 35 | make_dirs_remote({curr_dir}, creds) 36 | cmd = f'cd "{curr_dir}"; {cmd}' 37 | 38 | logging.debug(cmd) 39 | _, stdout, stderr = client.exec_command(cmd) 40 | exit_status = stdout.channel.recv_exit_status() 41 | 42 | if exit_status != 0: 43 | raise Exception('Failed to download a file\n%s\n%s' % (stdout.read().encode('utf-8'), stderr.read().encode('utf-8'))) 44 | 45 | client.close() 46 | return stdout.read() 47 | 48 | 49 | def run_remote_batch(cmds: list, creds: Credential, curr_dir: str=None, parallelism: int=10): 50 | """ runs commands on the remote machine in parallel 51 | @cmds: list of commands to run in parallel 52 | @creds: ssh credentials 53 | @curr_dir(optional): the currenct directory to run the command from 54 | @parallelism: int - how many commands to run at the same time 55 | """ 56 | client = paramiko.SSHClient() 57 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 58 | try: 59 | client.connect(**creds.__dict__) 60 | except TimeoutError: 61 | raise Exception(f"Failed to connect to {creds.hostname}. Attempt timed out.") 62 | 63 | ind = 0 64 | while ind list[str]: 115 | """ 116 | @include: a wild card pattern to include files or folders, default is '*' 117 | @exclude: list of wild card patterns to exclude files or folders 118 | returns 2 lists of strings which are folder paths and file paths 119 | """ 120 | files = [] 121 | folders = [] 122 | root = pathlib.Path(start_dir) 123 | for path in root.rglob(include): 124 | if path.is_file(): 125 | path = path.absolute().as_posix() 126 | if exclude: 127 | for ex in exclude: 128 | if re.match(ex.replace('*', '.*'), path): 129 | continue 130 | files.append(path) 131 | else: # folder: 132 | folders.append(path.absolute().as_posix()) 133 | return folders, files 134 | 135 | 136 | def __add_path(path: str, files: list, folders: list): 137 | """ 138 | @path: str, it starts with with 'F: ' or 'D: ' to distinguish file from folders 139 | @files: list of files to add to 140 | @folders: list of folders to add to 141 | """ 142 | if path[:2] == 'F:': 143 | files.append(path[2:].strip()) 144 | else: 145 | folders.append(path[2:].strip()) 146 | 147 | def find_remote(start_dir: str, creds: Credential, include: str='*', exclude: list=None): 148 | """ 149 | @include: a wild card pattern 150 | returns 2 lists of strings which are folder paths and file paths 151 | """ 152 | files = [] 153 | folders = [] 154 | cmd = 'find %s -type f -name "%s" -exec echo "F: {}" \\; -o -type d -exec echo "D: {}" \\;' % (start_dir, include) 155 | client = paramiko.SSHClient() 156 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 157 | client.connect(**creds.__dict__) 158 | 159 | stdout = client.exec_command(cmd)[1] 160 | output = stdout.read().decode('utf-8') 161 | paths = list(set(output.splitlines())) 162 | if exclude is None or len(exclude) < 1: 163 | for path in paths: 164 | __add_path(path, files, folders) 165 | else: 166 | exclude_pat = '|'.join(exclude).replace('*', '.*') 167 | for path in paths: 168 | path = path.strip() 169 | if not re.match(exclude_pat, path): 170 | __add_path(path, files, folders) 171 | return folders, files 172 | -------------------------------------------------------------------------------- /parallel_sync/rsync.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module copies files in parallel up or down stream 3 | from or to a remote host 4 | """ 5 | import os 6 | import re 7 | import hashlib 8 | import platform 9 | import subprocess 10 | from multiprocessing.pool import ThreadPool 11 | from functools import partial 12 | import logging 13 | from . import Credential, executor 14 | logging.basicConfig(level='INFO') 15 | 16 | 17 | def upload(src: str, dst: str, creds: Credential, 18 | tries: int=1, include: list='*', exclude: list=None, 19 | parallelism: int=10, extract: bool=False, 20 | validate: bool=False, additional_params: str='-c'): 21 | """ 22 | @src, @dst: source and destination directories 23 | @creds: ssh credentials 24 | @validate: bool - if True, it will perform a checksum comparison after the operation 25 | @additional_params: str - additional parameters to pass on to rsync 26 | """ 27 | __transfer(src, dst, creds, upstream=True,\ 28 | tries=tries, include=include, exclude=exclude, parallelism=parallelism,\ 29 | extract=extract, validate=validate, additional_params=additional_params) 30 | 31 | 32 | def download(src: str, dst: str, creds: Credential, 33 | tries: int=1, include: str='*', exclude: list=None, 34 | parallelism: int=10, extract: bool=False, 35 | validate: bool=False, additional_params: str='-c'): 36 | """ 37 | @src, @dst: source and destination directories 38 | @creds: ssh credentials 39 | @validate: bool - if True, it will perform a checksum comparison after the operation 40 | @additional_params: str - additional parameters to pass on to rsync 41 | """ 42 | __transfer(src, dst, creds, upstream=False, 43 | tries=tries, include=include, exclude=exclude, parallelism=parallelism, extract=extract, 44 | validate=validate, additional_params=additional_params) 45 | 46 | 47 | def __transfer(src: str, dst: str, creds: Credential, upstream: bool=True, 48 | tries: int=1, include: str='*', exclude: list=None, parallelism: int=10, extract: bool=False, 49 | validate: bool=False, additional_params: str='-c'): 50 | """ 51 | @src: str path of a file or folder for source 52 | @dst: path of a file or folder for destination 53 | @creds: ssh credentials 54 | @upstream: bool, whether it is upload or not (False means download) 55 | @tries: int, how many times to try 56 | @include: wild card pattern 57 | @exclude: list of wild card patterns 58 | @parallelism(default=10): number of parallel processes to use 59 | @extract: bool - whether to extract tar or zip files after transfer 60 | @validate: whether to do a checksum validation at the end 61 | @additional_params: str - additional parameters to pass on to rsync 62 | """ 63 | if src is None: 64 | raise ValueError('src cannot be None') 65 | 66 | if dst is None: 67 | raise ValueError('dst cannot be None') 68 | 69 | srcs = [] 70 | if upstream and os.path.isfile(src): 71 | srcs = [src] 72 | else: 73 | if upstream: # upload 74 | folder_srcs, srcs = executor.find_local(src, include=include, exclude=exclude) 75 | else: # download 76 | folder_srcs, srcs = executor.find_remote(src, creds, include=include, exclude=exclude) 77 | 78 | folder_dsts = set([__get_dst_path(src, s, dst) for s in folder_srcs if s!=src] + [dst]) 79 | __make_dirs(folder_dsts, creds, upstream) 80 | 81 | if len(srcs) < 1: 82 | logging.warning('No source files found to transfer.') 83 | return 84 | 85 | paths = [] 86 | for s_path in srcs: 87 | paths.append((s_path, __get_dst_path(src, s_path, dst))) 88 | 89 | __transfer_paths(paths, creds, upstream, 90 | tries=tries, parallelism=parallelism, extract=extract, 91 | validate=validate, additional_params=additional_params) 92 | 93 | def __get_dst_path(src: str, src_path:str, dst_dir: str): 94 | """ 95 | @src: str, the root of source directory to copy from 96 | @src_path: str, the full path of file or folder to copy 97 | @dst_dir: str, the destination folder 98 | returns the destination full file path 99 | Example: src=C:/temp/testdir 100 | src_path=C:/temp/testdir/emptydir 101 | dst_dir=/tmp/testdir 102 | returns /tmp/testdir/emptydir 103 | 104 | """ 105 | postfix = src_path[len(src):] 106 | if len(postfix) < 1: # src must be a file 107 | postfix = src.replace('\\', '/').split('/')[-1] 108 | 109 | if postfix.startswith('/') or postfix.startswith('\\'): 110 | postfix = postfix[1:] 111 | 112 | if dst_dir.endswith('/'): 113 | dst_dir = dst_dir[:-1] 114 | return f'{dst_dir}/{postfix}' 115 | 116 | 117 | def __make_dirs(folders: set, creds: Credential, upstream: bool): 118 | """ 119 | @folders: set of folder paths 120 | @creds: ssh credentials 121 | @upstream: bool, whether to upload or downolad 122 | Creates directories on the remote machine 123 | """ 124 | if upstream: 125 | executor.make_dirs_remote(folders, creds=creds) 126 | else: 127 | for folder in folders: 128 | os.makedirs(folder, exist_ok=True) 129 | 130 | 131 | def __is_rsync_installed(): 132 | """ 133 | returns bool, whether rsync is installed on the local machine or now 134 | """ 135 | if 'Windows' in platform.system(): 136 | return False 137 | 138 | proc = subprocess.run("which rsync", shell=True, check=False) 139 | return proc.returncode == 0 140 | 141 | 142 | def __get_transfer_commands(creds: Credential, upstream: bool, 143 | paths: list, additional_params: str='-c') -> list: 144 | """ 145 | @paths: list of tuples of (source_path, dest_path) 146 | note that source_path can be either local or remote 147 | @creds: ssh Credentials 148 | @upstream: bool whether it is upload or download 149 | @additional_params: str. You can pass additional rsync parameters. The default is just '-c' 150 | returns a list of commands to be run locally 151 | """ 152 | rsync = f"rsync {additional_params} -e 'ssh -i {creds.key_filename}' "\ 153 | "-o StrictHostKeyChecking=no -o ServerAliveInterval=100" 154 | 155 | cmds = [] 156 | for src, dst in paths: 157 | cmd = None 158 | if upstream and os.path.isdir(src): 159 | cmd = f'ssh -p {creds.port} {creds.username}@{creds.hostname} -i "{creds.key_filename}" mkdir -p {dst}' 160 | 161 | elif __is_rsync_installed(): 162 | if upstream: 163 | cmd = f'{rsync} "{src}" {creds.username}@{creds.hostname}:"{dst}" --port {creds.port}' 164 | else: # download: 165 | cmd = f'{rsync} {creds.username}@{creds.hostname}:"{src}" "{dst}"' 166 | 167 | else: # then use scp: 168 | if upstream: 169 | cmd = f'scp -P {creds.port} -i "{creds.key_filename}" "{src}" {creds.username}@{creds.hostname}:"{dst}"' 170 | else: # download: 171 | cmd = f'scp -P {creds.port} -i "{creds.key_filename}" {creds.username}@{creds.hostname}:"{src}" "{dst}"' 172 | 173 | cmds.append(cmd) 174 | return cmds 175 | 176 | 177 | 178 | def __transfer_paths(paths: list, creds: Credential, upstream: bool=True, tries: int=1, 179 | parallelism: int=10, extract: bool=False, validate: bool=False, additional_params: str='-c'): 180 | """ 181 | @paths: list of tuples of (source_path, dest_path) 182 | note that source_path can be either local or remote 183 | @creds: ssh Credentials 184 | @upstream: bool whether it is upload or download 185 | @tries: int. How many times to try to transfer the file. 186 | Default is 1. You can specify more then time to retry. 187 | @parallelism: int. How many processes to evoke to do the file transfer 188 | @extract: bool, whether after transfering the file it needs to be extracted 189 | @validate: bool, whether you want to do a checksum validation after the transfer 190 | @additional_params: str. You can pass additional rsync parameters. The default is just '-c' 191 | """ 192 | if len(paths) < 1: 193 | raise ValueError('You did not specify any paths') 194 | 195 | 196 | if creds.hostname in ['', None]: 197 | raise Exception('The host is not specified.') 198 | 199 | # __make_dirs(paths, creds, upstream) 200 | cmds = __get_transfer_commands(creds, upstream, paths, additional_params) 201 | pool = ThreadPool(processes=parallelism) 202 | func = partial(executor.local, tries=tries) 203 | pool.map(func, cmds) 204 | pool.close() 205 | pool.join() 206 | 207 | if validate and len(paths) > 0: 208 | validate_checksums(creds, upstream, parallelism, paths) 209 | 210 | if extract: 211 | extract_files(creds, upstream, paths) 212 | 213 | 214 | def extract_files(creds, upstream, paths): 215 | """ 216 | :param creds: dictionary 217 | :param upstream: boolean 218 | :param paths: list of tuples of (source_path, dest_path) 219 | """ 220 | logging.info('File extraction...') 221 | if upstream: # local=source, remote=dest 222 | cmds = [] 223 | for _, path in paths: 224 | if path.endswith('.gz'): 225 | cmds.append(f'gunzip "{path}"') 226 | if len(cmds) > 0: 227 | executor.remote_batch(cmds, creds) 228 | 229 | else: # local=dest, remote=source 230 | cmds = [] 231 | for _, path in paths: 232 | if path.endswith('.gz'): 233 | cmds.append(f'gunzip "{path}"') 234 | if len(cmds) > 0: 235 | executor.local_batch(cmds) 236 | 237 | 238 | def validate_checksums(creds, upstream, parallelism, paths): 239 | """ 240 | :param creds: a dictionary with the ssh credentials 241 | :param upstream: boolean 242 | :param paths: is a list of two paths: local path and remote path 243 | if fails, it raises an Exception 244 | """ 245 | logging.info('Checksum validation...') 246 | func = partial(checksum_validator, creds) 247 | # transform paths to be a pair of local and remote paths: 248 | paths2 = [] 249 | if upstream: # local=source, remote=dest 250 | paths2 = [(src, dst) for src, dst in paths] 251 | 252 | else: # local=dest, remote=source 253 | paths2 = [(dst, src) for src, dst in paths] 254 | 255 | pool = ThreadPool(processes=parallelism) 256 | pool.map(func, paths2) 257 | pool.close() 258 | pool.join() 259 | 260 | 261 | def checksum_validator(creds, paths): 262 | """ 263 | :param creds: a dictionary with the ssh credentials 264 | :param paths: is a list of two paths: local path and remote path 265 | if fails, it raises an Exception 266 | """ 267 | local_path, remote_path = paths 268 | checksum1 = executor.local(f'md5sum "{local_path}"').split(' ')[0] 269 | checksum2 = executor.remote(f'md5sum "{remote_path}"', creds).split(' ')[0] 270 | if checksum1 != checksum2: 271 | raise Exception('checksum mismatch for %s' % paths) 272 | logging.info('Verified: filename=%s checksum=%s', os.path.basename(local_path), checksum1) 273 | 274 | class CheckSumMismatch(Exception): 275 | pass 276 | 277 | def local_checksum_validator(paths: list): 278 | """ 279 | @paths: list of tuples of (source_path, dest_path) 280 | """ 281 | for src, dst in paths: 282 | checksum1 = hashlib.md5(open(src, 'rb').read()).hexdigest() 283 | checksum2 = hashlib.md5(open(dst, 'rb').read()).hexdigest() 284 | if checksum1 != checksum2: 285 | raise CheckSumMismatch(f'checksum mismatch for\n{src}\n{dst}') 286 | -------------------------------------------------------------------------------- /parallel_sync/wget.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module manages file operations such as parallel download 3 | """ 4 | import os 5 | from . import executor, compression, Credential 6 | TIMEOUT = 40 7 | 8 | 9 | def __url_to_filename(url: str): 10 | """ retrieves the filename from the url """ 11 | filename = os.path.basename(url).strip() 12 | if filename.endswith('?'): 13 | filename = filename[:-1] 14 | return filename 15 | 16 | 17 | def download(creds: Credential, target_dir: str, urls: list, 18 | filenames: list=None, parallelism: int=10, tries: int=3, 19 | extract: bool=False, timeout: int=TIMEOUT): 20 | """ downloads large files on a remote machine 21 | @creds: ssh credentials 22 | @target_dir: where to download to 23 | @urls: a list of urls or a single url 24 | @filenames: list of filenames. If used, the the urls will be downloaded to 25 | those file names 26 | @parallelism(default=10): number of parallel processes to use 27 | @extract: boolean - whether to extract tar or zip files after download 28 | """ 29 | if isinstance(urls, str): 30 | urls = [urls] 31 | 32 | if not isinstance(urls, list): 33 | raise ValueError(f'Expected a list of urls. Received {urls}') 34 | 35 | if not os.path.exists(target_dir): 36 | os.makedirs(target_dir) 37 | 38 | cmds = [] 39 | if filenames is not None and len(filenames) != len(urls): 40 | raise ValueError('You have specified filenames but the number '\ 41 | 'of filenames does not match the number of urls') 42 | 43 | filenames = [__url_to_filename(url) for url in urls] 44 | for ind, _url in enumerate(urls): 45 | filename = filenames[ind] 46 | file_path = f'{target_dir}/{filename}' 47 | cmd = f'wget -O "{file_path}" -t {tries} -T {timeout} "{_url}"' 48 | # note: don't use the -q option because 49 | # if it fails, you don't get any message or return code 50 | if extract: 51 | ext = compression.get_unzip_cmd(file_path) 52 | if ext is not None: 53 | cmd = f'{cmd};cd "{target_dir}";{ext} "{filename}"' 54 | cmds.append(cmd) 55 | 56 | executor.run_remote_batch(cmds, creds, curr_dir=target_dir, parallelism=parallelism) 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | paramiko 3 | six -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | release = egg_info -RDb '' 3 | 4 | [metadata] 5 | description-file = README.md 6 | 7 | [bdist_wheel] 8 | universal=1 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | **parallel_sync** 3 | 4 | 5 | parallel_sync is a python package for uploading or downloading files using multiprocessing and md5 checks on Linux. 6 | The files can be transferred from a remote linux host or a url. 7 | 8 | 9 | Link 10 | ````` 11 | 12 | * Source 13 | https://github.com/kouroshparsa/parallel_sync 14 | 15 | """ 16 | from setuptools import setup, find_packages 17 | import os 18 | from distutils import sysconfig 19 | 20 | inc_path = sysconfig.get_config_vars()['INCLUDEPY'] 21 | if not os.path.exists(os.path.join(inc_path, 'Python.h')): 22 | print('WARNING: You must install python headers to install the Paramiko dependency.' 23 | '\nExample on ubuntu: sudo apt-get install python-dev' 24 | '\nExample on centos: sudo yum install python-dev') 25 | 26 | version = '2.0.1' 27 | setup( 28 | name='parallel_sync', 29 | version=version, 30 | url='https://github.com/kouroshparsa/parallel_sync', 31 | download_url='https://github.com/kouroshparsa/parallel_sync/packages/%s' % version, 32 | license='GNU', 33 | author='Kourosh Parsa', 34 | author_email="kouroshtheking@gmail.com", 35 | description='A Parallelized file/url syncing package', 36 | long_description=__doc__, 37 | packages=find_packages(), 38 | install_requires=['paramiko>=1.15.2', 'six'], 39 | python_requires='>=3', 40 | include_package_data=True, 41 | zip_safe=False, 42 | platforms='Linux', 43 | classifiers=[ 44 | 'Operating System :: Unix', 45 | 'Programming Language :: Python' 46 | ] 47 | ) 48 | -------------------------------------------------------------------------------- /smoke_tests/rsync.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file has smoke tests. Here is how you can run them: 3 | python .\smoke_tests\rsync.py 4 | """ 5 | from parallel_sync import rsync, Credential 6 | import unittest 7 | 8 | class TestStringMethods(unittest.TestCase): 9 | 10 | def test_upload_file(self): 11 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa') 12 | rsync.upload('c:/temp/test.txt', '/tmp/', creds=creds) 13 | 14 | def test_upload_dir(self): 15 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa') 16 | rsync.upload('c:/temp/testdir', '/tmp/testdir', creds=creds) 17 | 18 | def test_download_file(self): 19 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa') 20 | rsync.download('/tmp/test.txt', 'C:/temp/x/', creds=creds) 21 | 22 | def test_download_dir(self): 23 | creds = Credential(username='kourosh', hostname='localhost', port=3022, key_filename='C:/kourosh/virtualbox_ssh_key/id_rsa') 24 | rsync.download('/tmp/testdir', 'c:/temp/z', creds=creds) 25 | 26 | if __name__ == '__main__': 27 | unittest.main() -------------------------------------------------------------------------------- /tests/test_rsync.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file has the unittests, to run use this command: 3 | pytest 4 | """ 5 | from parallel_sync import rsync, Credential 6 | import pytest 7 | from unittest.mock import patch 8 | 9 | def test_upload_null_params(): 10 | with pytest.raises(Exception): 11 | rsync.upload(None, None, creds=None) 12 | 13 | 14 | def test_get_dst_path(): 15 | assert rsync.__get_dst_path('/x', '/x/a', '/tmp') == '/tmp/a' 16 | assert rsync.__get_dst_path('/x', '/x/a/b', '/tmp') == '/tmp/a/b' 17 | assert rsync.__get_dst_path('/x/filename', '/x/filename', '/tmp') == '/tmp/filename' 18 | assert rsync.__get_dst_path('C:\\x\\filename', 'C:\\x\\filename', '/tmp') == '/tmp/filename' 19 | 20 | class MockStdOut: 21 | class Channel: 22 | def recv_exit_status(self): 23 | return 0 24 | channel = Channel() 25 | def read(self): 26 | return '' 27 | 28 | @patch('parallel_sync.executor.find_local') 29 | @patch('paramiko.SSHClient.connect') 30 | @patch('paramiko.SSHClient.exec_command') 31 | @patch('parallel_sync.rsync.__get_transfer_commands') 32 | def test_upload(mock_tr_cmd, mock_exec_command, mock_connect, mock_find_local): 33 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k') 34 | mock_find_local.return_value = ['/src_dir/a', '/src_dir/b'] 35 | mock_connect.return_value = None 36 | buffer = MockStdOut() 37 | mock_exec_command.return_value = [None, buffer, buffer] 38 | mock_tr_cmd.return_value = [] 39 | rsync.upload('/src_dir', '/dst_dir', creds=creds) 40 | assert mock_tr_cmd.called 41 | 42 | @patch('parallel_sync.rsync.__is_rsync_installed') 43 | def test_get_transfer_commands_rsync_upstream(mock_is_rsync_installed): 44 | mock_is_rsync_installed.return_value = True 45 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k') 46 | paths = [('/src/1', '/dst/1'), 47 | ('/src/2', '/dst/2')]# first source, then destination path 48 | cmds = rsync.__get_transfer_commands(creds, True, paths) 49 | assert cmds == ['rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 "/src/1" u@h:"/dst/1" --port 3022', 50 | 'rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 "/src/2" u@h:"/dst/2" --port 3022'] 51 | 52 | @patch('parallel_sync.rsync.__is_rsync_installed') 53 | def test_get_transfer_commands_rsync_downstream(mock_is_rsync_installed): 54 | mock_is_rsync_installed.return_value = True 55 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k') 56 | paths = [('/src/1', '/dst/1'), 57 | ('/src/2', '/dst/2')]# first source, then destination path 58 | cmds = rsync.__get_transfer_commands(creds, False, paths) 59 | assert cmds == ['rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 u@h:"/src/1" "/dst/1"', 60 | 'rsync -c -e \'ssh -i k\' -o StrictHostKeyChecking=no -o ServerAliveInterval=100 u@h:"/src/2" "/dst/2"'] 61 | 62 | @patch('parallel_sync.rsync.__is_rsync_installed') 63 | def test_get_transfer_commands_scp_upstream(mock_is_rsync_installed): 64 | mock_is_rsync_installed.return_value = False 65 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k') 66 | paths = [('/src/1', '/dst/1'), 67 | ('/src/2', '/dst/2')]# first source, then destination path 68 | cmds = rsync.__get_transfer_commands(creds, True, paths) 69 | assert cmds == ['scp -P 3022 -i "k" "/src/1" u@h:"/dst/1"', 70 | 'scp -P 3022 -i "k" "/src/2" u@h:"/dst/2"'] 71 | 72 | @patch('parallel_sync.rsync.__is_rsync_installed') 73 | def test_get_transfer_commands_scp_downstream(mock_is_rsync_installed): 74 | mock_is_rsync_installed.return_value = False 75 | creds = Credential(username='u', hostname='h',port=3022, key_filename='k') 76 | paths = [('/src/1', '/dst/1'), 77 | ('/src/2', '/dst/2')]# first source, then destination path 78 | cmds = rsync.__get_transfer_commands(creds, False, paths) 79 | assert cmds == ['scp -P 3022 -i "k" u@h:"/src/1" "/dst/1"', 80 | 'scp -P 3022 -i "k" u@h:"/src/2" "/dst/2"'] 81 | --------------------------------------------------------------------------------