├── storage ├── log │ └── .gitkeep └── packages │ └── .gitkeep ├── requirements.txt ├── config └── config.example.yaml ├── CONTRIBUTING.md ├── LICENSE ├── reflector.py ├── README.md ├── .gitignore └── reflector ├── util.py └── __init__.py /storage/log/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /storage/packages/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml >= 3.7.3 2 | beautifulsoup4 >= 4.6.0 3 | pyyaml >= 3.12 4 | requests >= 2.14.2 5 | argparse >= 1.4.0 -------------------------------------------------------------------------------- /config/config.example.yaml: -------------------------------------------------------------------------------- 1 | remote: 2 | url: https://chocolatey.org/ 3 | update_feed: https://feeds.feedburner.com/chocolatey?format=xml # Set to false to disable 4 | json_api: false 5 | 6 | local: 7 | url: http://localhost/ 8 | json_api: false 9 | api_key: null # Fill this in 10 | package_storage_path: storage/packages/ 11 | dotnet_path: false # Example: /usr/local/share/dotnet/dotnet 12 | 13 | hash: 14 | verify_downloads: true 15 | verify_uploaded: true 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thanks for your interest in this project! To contribute: 4 | 5 | 6 | Fork, then clone the repo: 7 | 8 | ```bash 9 | git clone git@github.com:your-username/NuGetReflector.git; 10 | ``` 11 | 12 | Make sure your version of python is version 2 at least version `2.7` preferably `2.7.13` and you have pip. 13 | 14 | Install python libraries: 15 | 16 | ```bash 17 | pip install -r requirements.txt; 18 | ``` 19 | 20 | Install the [DotNet CLI](https://github.com/dotnet/cli), and make note of the `dotnet` binary path. 21 | 22 | Create a new config and edit it: 23 | 24 | ```bash 25 | cp config/config.example.yaml config/config.yaml; 26 | vi config/config.yaml; 27 | ``` 28 | 29 | Config options can be found on the [README](README.md). 30 | 31 | Make your changes... 32 | 33 | Push to your fork and [submit a pull request][pr]. 34 | 35 | [pr]: https://github.com/MelonSmasher/NuGetReflector/compare/ 36 | 37 | At this point you're waiting on me. I'll do my best to review all pull requests when I have the time. 38 | 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Alex Markessinis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /reflector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | from reflector import Mirror, Config 5 | import argparse 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser(description='Synchronize an NuGet mirror from a target repository.') 10 | parser.add_argument('-d', '--delta', action='store_true', 11 | help='Sync the latest packages from the feed url.') 12 | parser.add_argument('-f', '--full', action='store_true', 13 | help='Reconcile the entire local mirror against the remote repo.') 14 | args = parser.parse_args() 15 | 16 | config = Config() 17 | mirror = Mirror( 18 | config.remote_url, 19 | config.update_feed, 20 | config.remote_json_api, 21 | config.local_url, 22 | config.local_json_api, 23 | config.package_storage_path, 24 | config.local_api_key, 25 | config.dotnet_path, 26 | config.hash_verify_downloads, 27 | config.hash_verify_uploaded 28 | ) 29 | 30 | if args.delta: 31 | print('Starting a delta sync') 32 | mirror.delta_sync() 33 | 34 | if args.full: 35 | print('Starting a full sync') 36 | mirror.sync_packages() 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NuGetReflector 2 | 3 | This tool mirrors an NuGet repository to another NuGet server. This can be used to clone public repositories or used to make private repositories redundant. 4 | 5 | ### Features: 6 | 7 | - Full Sync - Fully reconcile your mirror against a remote repository. 8 | - Delta Sync - Catch up on newly created packages and new updated packages. Note: this requires an atom feed modeled after the [Chocolatey update feed](https://feeds.feedburner.com/chocolatey?format=xml). 9 | 10 | ### Todo: 11 | 12 | - Improve logging 13 | - Support multiple mirrors 14 | - Threaded syncs... maybe, might be overkill 15 | 16 | --- 17 | 18 | # Install 19 | 20 | ### Needs: 21 | 22 | * Python >= 2.7 (tested on 2.7.13) 23 | * Python < 3 24 | * Pip 25 | * [DotNet CLI](https://github.com/dotnet/cli) 26 | 27 | #### Step 1: 28 | 29 | Install the DotNet CLI. Instructions can be found on [their repository](https://github.com/dotnet/cli). 30 | 31 | #### Step 2: 32 | 33 | Locate the `dotnet` binary and make note of the path. 34 | 35 | On OS X: 36 | 37 | ```bash 38 | which dotnet; 39 | # /usr/local/share/dotnet/dotnet 40 | ``` 41 | 42 | On Linux: 43 | 44 | It depends on your distro or where you extract the tarball. 45 | 46 | On Windows: 47 | 48 | Have not tested on Windows. 49 | 50 | #### Step 3: 51 | 52 | Get the source and configure options. 53 | 54 | ```shell 55 | cd /opt; 56 | git clone https://github.com/MelonSmasher/NuGetReflector.git; 57 | cd NuGetReflector; 58 | cp config/config.example.yaml config/config.yaml; 59 | vi config/config.yaml; # Fill out your settings, see config options below. 60 | pip install -r requirements.txt; 61 | ``` 62 | 63 | # Config options: 64 | 65 | - `remote:` 66 | - `url:` - remote repo to mirror # Default: https://chocolatey.org/ 67 | - `update_feed:` xml feed that supplies updates # Default: https://feeds.feedburner.com/chocolatey?format=xml 68 | - `json_api:` - request json from remote API # Default false 69 | 70 | - `local:` 71 | - `url:` - local repo to host mirror # Default: http://localhost/ 72 | - `json_api:` - request json from local API # Default false 73 | - `api_key:` - local repo api key # Default: null 74 | - `package_storage_path:` - Local path to store packages # Default: storage/packages/ 75 | - `dotnet_path:` - Path to dontnet executable # Default: false # Example: /usr/local/share/dotnet/dotnet 76 | 77 | - `hash:` 78 | - `verify_downloads:` - Verify downloaded package hash. You should leave this enabled # Default: true 79 | - `verify_uploaded:` - Verify package hash after it has been uploaded to the mirror. You should leave this enabled # Default: true 80 | 81 | # Usage: 82 | 83 | ### Full sync: 84 | 85 | Manually: 86 | 87 | ```bash 88 | ./reflector.py -f; 89 | ``` 90 | 91 | Cron Job every 24 hours a 12:05 am: 92 | 93 | ```bash 94 | 5 0 * * * cd /opt/NuGetReflector; python /opt/NuGetReflector/reflector.py --full 1>> /opt/NuGetReflector/storage/log/sync.log 2>> /opt/NuGetReflector/storage/log/error.log 95 | ``` 96 | 97 | ### Incremental "delta" sync: 98 | 99 | Manually: 100 | 101 | ```bash 102 | ./reflector.py -d; 103 | ``` 104 | 105 | Cron Job every 10 minutes: 106 | 107 | ```bash 108 | */10 * * * * cd /opt/NuGetReflector; python /opt/NuGetReflector/reflector.py --delta 1>> /opt/NuGetReflector/storage/log/sync.log 2>> /opt/NuGetReflector/storage/log/error.log 109 | ``` 110 | 111 | # Contributing: 112 | 113 | See the [contribution guidelines](CONTRIBUTING.md). 114 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | ### Project Template ### 3 | config/config.yaml 4 | storage/packages/*.nupkg 5 | storage/log/*.log 6 | storage/sync.delta 7 | 8 | ### macOS template 9 | *.DS_Store 10 | .AppleDouble 11 | .LSOverride 12 | 13 | # Icon must end with two \r 14 | Icon 15 | 16 | 17 | # Thumbnails 18 | ._* 19 | 20 | # Files that might appear in the root of a volume 21 | .DocumentRevisions-V100 22 | .fseventsd 23 | .Spotlight-V100 24 | .TemporaryItems 25 | .Trashes 26 | .VolumeIcon.icns 27 | .com.apple.timemachine.donotpresent 28 | 29 | # Directories potentially created on remote AFP share 30 | .AppleDB 31 | .AppleDesktop 32 | Network Trash Folder 33 | Temporary Items 34 | .apdisk 35 | ### SublimeText template 36 | # cache files for sublime text 37 | *.tmlanguage.cache 38 | *.tmPreferences.cache 39 | *.stTheme.cache 40 | 41 | # workspace files are user-specific 42 | *.sublime-workspace 43 | 44 | # project files should be checked into the repository, unless a significant 45 | # proportion of contributors will probably not be using SublimeText 46 | # *.sublime-project 47 | 48 | # sftp configuration file 49 | sftp-config.json 50 | 51 | # Package control specific files 52 | Package Control.last-run 53 | Package Control.ca-list 54 | Package Control.ca-bundle 55 | Package Control.system-ca-bundle 56 | Package Control.cache/ 57 | Package Control.ca-certs/ 58 | Package Control.merged-ca-bundle 59 | Package Control.user-ca-bundle 60 | oscrypto-ca-bundle.crt 61 | bh_unicode_properties.cache 62 | 63 | # Sublime-github package stores a github token in this file 64 | # https://packagecontrol.io/packages/sublime-github 65 | GitHub.sublime-settings 66 | ### Windows template 67 | # Windows thumbnail cache files 68 | Thumbs.db 69 | ehthumbs.db 70 | ehthumbs_vista.db 71 | 72 | # Folder config file 73 | Desktop.ini 74 | 75 | # Recycle Bin used on file shares 76 | $RECYCLE.BIN/ 77 | 78 | # Windows Installer files 79 | *.cab 80 | *.msi 81 | *.msm 82 | *.msp 83 | 84 | # Windows shortcuts 85 | *.lnk 86 | ### Linux template 87 | *~ 88 | 89 | # temporary files which can be created if a process still has a handle open of a deleted file 90 | .fuse_hidden* 91 | 92 | # KDE directory preferences 93 | .directory 94 | 95 | # Linux trash folder which might appear on any partition or disk 96 | .Trash-* 97 | 98 | # .nfs files are created when an open file is removed but is still being accessed 99 | .nfs* 100 | ### JetBrains template 101 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 102 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 103 | 104 | .idea/ 105 | 106 | ## File-based project format: 107 | *.iws 108 | 109 | ## Plugin-specific files: 110 | 111 | # IntelliJ 112 | /out/ 113 | 114 | # mpeltonen/sbt-idea plugin 115 | .idea_modules/ 116 | 117 | # JIRA plugin 118 | atlassian-ide-plugin.xml 119 | 120 | # Crashlytics plugin (for Android Studio and IntelliJ) 121 | com_crashlytics_export_strings.xml 122 | crashlytics.properties 123 | crashlytics-build.properties 124 | fabric.properties 125 | ### Vim template 126 | # swap 127 | [._]*.s[a-v][a-z] 128 | [._]*.sw[a-p] 129 | [._]s[a-v][a-z] 130 | [._]sw[a-p] 131 | # session 132 | Session.vim 133 | # temporary 134 | .netrwhist 135 | # auto-generated tag files 136 | tags 137 | ### Python template 138 | # Byte-compiled / optimized / DLL files 139 | __pycache__/ 140 | *.py[cod] 141 | *$py.class 142 | 143 | # C extensions 144 | *.so 145 | 146 | # Distribution / packaging 147 | .Python 148 | env/ 149 | build/ 150 | develop-eggs/ 151 | dist/ 152 | downloads/ 153 | eggs/ 154 | .eggs/ 155 | lib/ 156 | lib64/ 157 | parts/ 158 | sdist/ 159 | var/ 160 | wheels/ 161 | *.egg-info/ 162 | .installed.cfg 163 | *.egg 164 | 165 | # PyInstaller 166 | # Usually these files are written by a python script from a template 167 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 168 | *.manifest 169 | *.spec 170 | 171 | # Installer logs 172 | pip-log.txt 173 | pip-delete-this-directory.txt 174 | 175 | # Unit test / coverage reports 176 | htmlcov/ 177 | .tox/ 178 | .coverage 179 | .coverage.* 180 | .cache 181 | nosetests.xml 182 | coverage.xml 183 | *,cover 184 | .hypothesis/ 185 | 186 | # Translations 187 | *.mo 188 | *.pot 189 | 190 | # Django stuff: 191 | *.log 192 | local_settings.py 193 | 194 | # Flask stuff: 195 | instance/ 196 | .webassets-cache 197 | 198 | # Scrapy stuff: 199 | .scrapy 200 | 201 | # Sphinx documentation 202 | docs/_build/ 203 | 204 | # PyBuilder 205 | target/ 206 | 207 | # Jupyter Notebook 208 | .ipynb_checkpoints 209 | 210 | # pyenv 211 | .python-version 212 | 213 | # celery beat schedule file 214 | celerybeat-schedule 215 | 216 | # SageMath parsed files 217 | *.sage.py 218 | 219 | # dotenv 220 | .env 221 | 222 | # virtualenv 223 | .venv 224 | venv/ 225 | ENV/ 226 | 227 | # Spyder project settings 228 | .spyderproject 229 | 230 | # Rope project settings 231 | .ropeproject 232 | -------------------------------------------------------------------------------- /reflector/util.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from requests import get, put, exceptions 3 | from subprocess import call 4 | import hashlib 5 | import base64 6 | import sys 7 | import os 8 | import time 9 | import datetime 10 | 11 | 12 | def _pull(url, json=False): 13 | """ 14 | :param url: 15 | :param json: 16 | :return: 17 | """ 18 | tries = 0 19 | while tries < 3: 20 | if json: 21 | try: 22 | response = get(url, headers={'Accept': 'application/json'}, timeout=5.501) 23 | if response.status_code == 200: 24 | return response 25 | elif response.status_code == 404: 26 | print('Received a NOT FOUND response') 27 | print(str(response.status_code) + ' / ' + response.reason) 28 | print(url) 29 | return response 30 | except exceptions.Timeout: 31 | print('Timed out when trying to pull...') 32 | except exceptions.ConnectionError: 33 | print('Timed out when trying to pull...') 34 | except Exception as e: 35 | print('Ran into a general error when trying to pull...') 36 | print(e.message) 37 | print(e) 38 | else: 39 | try: 40 | response = get(url, timeout=5.501) 41 | if response.status_code == 200: 42 | response.objectified = BeautifulSoup(response.content, 'xml') 43 | return response 44 | elif response.status_code == 404: 45 | print('Received a NOT FOUND response') 46 | print(str(response.status_code) + ' / ' + response.reason) 47 | print(url) 48 | return response 49 | except exceptions.Timeout: 50 | print('Timed out when trying to pull...') 51 | except exceptions.ConnectionError: 52 | print('Timed out when trying to pull...') 53 | except Exception as e: 54 | print('Ran into a general error when trying to pull...') 55 | print(e.message) 56 | print(e) 57 | print('Received an undefined response') 58 | print(str(response.status_code) + ' / ' + response.reason) 59 | tries += 1 60 | print('Sleeping for 10 then trying again...') 61 | time.sleep(10) 62 | return False 63 | 64 | 65 | def pull_package(title, version, url, json=False): 66 | """ 67 | :param title: 68 | :param version: 69 | :param url: 70 | :param json: 71 | :return: 72 | """ 73 | return _pull(''.join([url, '(Id=\'', title, '\',Version=\'', version, '\')']), json=json) 74 | 75 | 76 | def pull_packages(url, json=False): 77 | """ 78 | :param url: 79 | :param json: 80 | :return: 81 | """ 82 | return _pull(url, json=json) 83 | 84 | 85 | def pull_updates(url): 86 | """ 87 | :param url: 88 | :return: 89 | """ 90 | return _pull(url, json=False) 91 | 92 | 93 | def push_package_dotnet(package_path, repo_url, api_key, dotnet): 94 | """ 95 | :param package_path: 96 | :param repo_url: 97 | :param api_key: 98 | :param dotnet: 99 | :return: 100 | """ 101 | cmd = ' '.join([dotnet, 'nuget', 'push', package_path, '-s', repo_url, '-k', api_key]) 102 | return call(cmd, shell=True) 103 | 104 | 105 | def push_package_native(package_path, repo_url, api_key): 106 | """ 107 | :param package_path: 108 | :param repo_url: 109 | :param api_key: 110 | :return: 111 | """ 112 | f = open(package_path, mode='rb') 113 | files = {'package': ('package', f, 'application/octet-stream')} 114 | headers = {'X-NuGet-ApiKey': api_key} 115 | return put(repo_url, files=files, headers=headers) 116 | 117 | 118 | def download_file(url, save_to): 119 | """ 120 | :param url: 121 | :param save_to: 122 | :return: 123 | """ 124 | count = 0 125 | sys.stdout.write('Downloading.') 126 | sys.stdout.flush() 127 | # Does the file already exist? 128 | if os.path.isfile(save_to): 129 | # If the file exists remove it since we forced 130 | os.remove(save_to) 131 | # Get the file and stream it to the disk 132 | r = get(url, stream=True) 133 | with open(save_to, 'wb') as f: 134 | for chunk in r.iter_content(chunk_size=1024): 135 | if chunk: 136 | # Count the chunks 137 | count += 1 138 | if count >= 1024: 139 | # Write a dot every 1024 chunks 140 | sys.stdout.write('.') 141 | sys.stdout.flush() 142 | count = 0 143 | # Write to the file when the chunk gets to 1024 144 | f.write(chunk) 145 | f.flush() 146 | print(' Done!') 147 | return True if os.path.isfile(save_to) else False 148 | 149 | 150 | def sha512sum(file_path, block_size=65536): 151 | """ 152 | :param file_path: 153 | :param block_size: 154 | :return: 155 | """ 156 | return base64.encodestring( 157 | __hash_byte_str_iter(__file_as_block_iter(open(file_path, 'rb'), block_size), hashlib.sha512(), False) 158 | ).replace("\n", '') 159 | 160 | 161 | def sha256sum(file_path, block_size=65536): 162 | """ 163 | :param file_path: 164 | :param block_size: 165 | :return: 166 | """ 167 | return base64.encodestring( 168 | __hash_byte_str_iter(__file_as_block_iter(open(file_path, 'rb'), block_size), hashlib.sha256(), False) 169 | ).replace("\n", '') 170 | 171 | 172 | def sha1sum(file_path, block_size=65536): 173 | """ 174 | :param file_path: 175 | :param block_size: 176 | :return: 177 | """ 178 | return base64.encodestring( 179 | __hash_byte_str_iter(__file_as_block_iter(open(file_path, 'rb'), block_size), hashlib.sha1, False) 180 | ).replace("\n", '') 181 | 182 | 183 | def hashes_match(hash_1, hash_2): 184 | """ 185 | :param hash_1: 186 | :param hash_2: 187 | :return: 188 | """ 189 | if str(hash_1) == str(hash_2): 190 | return True 191 | else: 192 | return False 193 | 194 | 195 | def verify_hash(file_path, target_hash, message='Verifying package hash.', hash_method='sha512'): 196 | """ 197 | :param file_path: 198 | :param target_hash: 199 | :param message: 200 | :param hash_method: 201 | :return: 202 | """ 203 | sys.stdout.write(message) 204 | sys.stdout.flush() 205 | hash_method.lower() 206 | 207 | if hash_method == 'sha512': 208 | sys.stdout.write('.') 209 | local_hash = sha512sum(file_path) 210 | sys.stdout.write('.') 211 | elif hash_method == 'sha256': 212 | local_hash = sha256sum(file_path) 213 | sys.stdout.write('.') 214 | elif hash_method == 'sha1': 215 | sys.stdout.write('.') 216 | local_hash = sha1sum(file_path) 217 | else: 218 | sys.stdout.write('.') 219 | local_hash = sha512sum(file_path) 220 | 221 | print('. Done!') 222 | return hashes_match(local_hash, target_hash) 223 | 224 | 225 | def utc_to_epoch(time_stamp, time_format='%Y-%m-%dT%H:%M:%SZ'): 226 | """ 227 | :param time_stamp: 228 | :param time_format: 229 | :return: 230 | """ 231 | return int(time.mktime(time.strptime(time_stamp, time_format))) 232 | 233 | 234 | def epoch_to_utc(epoch=0, time_format='%Y-%m-%dT%H:%M:%SZ'): 235 | """ 236 | :param epoch: 237 | :param time_format: 238 | :return: 239 | """ 240 | return time.strftime(time_format, time.gmtime(epoch)) 241 | 242 | 243 | def now_as_epoch(): 244 | """ 245 | :return: 246 | """ 247 | return int(time.time()) 248 | 249 | 250 | def first_epoch(): 251 | """ 252 | :return: 253 | """ 254 | return int(0) 255 | 256 | 257 | def now_as_utc(time_format='%Y-%m-%dT%H:%M:%SZ'): 258 | """ 259 | :param time_format: 260 | :return: 261 | """ 262 | return datetime.datetime.now().strftime(time_format) 263 | 264 | 265 | def touch(file_path): 266 | """ 267 | :param file_path: 268 | :return: 269 | """ 270 | with open(file_path, 'a'): 271 | os.utime(file_path, None) 272 | 273 | 274 | def store_delta(delta, file_path='storage/sync.delta'): 275 | """ 276 | :param delta: 277 | :param file_path: 278 | :return: 279 | """ 280 | touch(file_path) 281 | with open(file_path, 'w') as f: 282 | f.write(''.join([delta, "\n"])) 283 | 284 | 285 | def read_delta(file_path='storage/sync.delta'): 286 | """ 287 | :param file_path: 288 | :return: 289 | """ 290 | touch(file_path) 291 | try: 292 | with open(file_path, 'rb') as f: 293 | f.seek(-21, 2) 294 | return str(f.readlines()[-1].decode().rstrip("\n")) 295 | except IOError: 296 | return None 297 | 298 | 299 | def __hash_byte_str_iter(bytes_iter, hasher, as_hex_str=False): 300 | """ 301 | :param bytes_iter: 302 | :param hasher: 303 | :param as_hex_str: 304 | :return: 305 | """ 306 | for block in bytes_iter: 307 | hasher.update(block) 308 | return hasher.hexdigest() if as_hex_str else hasher.digest() 309 | 310 | 311 | def __file_as_block_iter(f, block_size=65536): 312 | """ 313 | :param f: 314 | :param block_size: 315 | :return: 316 | """ 317 | with f: 318 | block = f.read(block_size) 319 | while len(block) > 0: 320 | yield block 321 | block = f.read(block_size) 322 | -------------------------------------------------------------------------------- /reflector/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from os.path import isfile, exists 3 | from os import mknod, remove 4 | from time import sleep 5 | from yaml import load 6 | from reflector.util import * 7 | 8 | KEY_TITLE = {'xml': 'title', 'json': 'Id'} 9 | KEY_CONTENT = 'content' 10 | KEY_SRC = 'src' 11 | KEY_REL = 'rel' 12 | KEY_HREF = 'href' 13 | VALUE_NEXT = {'xml': 'next', 'json': '__next'} 14 | 15 | 16 | class Config(object): 17 | def __init__(self): 18 | with open('config/config.yaml') as data_file: 19 | c = load(data_file) 20 | self.remote_url = c['remote']['url'].rstrip('/') 21 | self.update_feed = c['remote']['update_feed'] 22 | self.remote_json_api = c['remote']['json_api'] 23 | self.local_url = c['local']['url'].rstrip('/') 24 | self.local_json_api = c['local']['json_api'] 25 | self.local_api_key = c['local']['api_key'] 26 | self.package_storage_path = c['local']['package_storage_path'].rstrip('/').rstrip('\\') 27 | self.hash_verify_downloads = c['hash']['verify_downloads'] 28 | self.hash_verify_uploaded = c['hash']['verify_uploaded'] 29 | self.dotnet_path = c['local']['dotnet_path'] 30 | if not self.dotnet_path or not isfile(self.dotnet_path): 31 | raise EnvironmentError('DotNot CLI executable is not configured or the path specified does not exist!') 32 | 33 | 34 | class Mirror(object): 35 | def __init__(self, 36 | remote_url, 37 | update_feed, 38 | remote_json_api, 39 | local_url, 40 | local_json_api, 41 | package_storage_path, 42 | local_api_key, 43 | dotnet_path, 44 | verify_downloads=True, 45 | verify_uploaded=True 46 | ): 47 | """ 48 | :param remote_url: 49 | :param update_feed: 50 | :param remote_json_api: 51 | :param local_url: 52 | :param local_json_api: 53 | :param package_storage_path: 54 | :param local_api_key: 55 | :param dotnet_path: 56 | :param verify_downloads: 57 | :param verify_uploaded: 58 | """ 59 | self.remote_api_url = '/'.join([remote_url, 'api/v2']) 60 | self.update_feed = update_feed 61 | self.remote_json_api = remote_json_api 62 | self.remote_packages_url = '/'.join([self.remote_api_url, 'Packages']) 63 | self.local_api_url = '/'.join([local_url, 'api/v2']) 64 | self.local_json_api = local_json_api 65 | self.local_api_upload_url = '/'.join([self.local_api_url, 'package']) 66 | self.local_packages_url = '/'.join([self.local_api_url, 'Packages']) 67 | self.package_storage_path = package_storage_path 68 | self.local_api_key = local_api_key 69 | self.dotnet_path = dotnet_path 70 | self.verify_downloads = verify_downloads 71 | self.verify_uploaded = verify_uploaded 72 | 73 | def __sync(self, content_url, save_to, package_name, version, 74 | source_hash=None, 75 | source_hash_method=None, 76 | dl_reties=0, 77 | up_retries=0, 78 | force_dl=False, 79 | force_up=False 80 | ): 81 | """ 82 | :param content_url: 83 | :param save_to: 84 | :param package_name: 85 | :param version: 86 | :param source_hash: 87 | :param source_hash_method: 88 | :param dl_reties: 89 | :param up_retries: 90 | :param force_dl: 91 | :param force_up: 92 | :return: 93 | """ 94 | # Is the package already uploaded? Pull it from the target API 95 | pull_request = pull_package(package_name, version, self.local_packages_url, self.local_json_api) 96 | 97 | if not pull_request.status_code: 98 | print('Unable to sync package.') 99 | return False 100 | 101 | # What did the target api return 102 | if pull_request.status_code == 404 or pull_request.status_code == 200 or force_dl or not isfile(save_to): 103 | # Download the file if we are forcing or it was not already uploaded or cached 104 | if pull_request.status_code == 404 or not isfile(save_to) or force_dl: 105 | if not download_file(content_url, save_to): 106 | print('Package does not exists after download!?!') 107 | return False 108 | 109 | # Did we get a source hash when this was called 110 | if source_hash is not None and self.verify_downloads: 111 | # Verify the cached package hash 112 | hash_verified = verify_hash(save_to, source_hash, hash_method=source_hash_method) 113 | # If the hash is not verified and we have retired less than 3 times 114 | if not hash_verified and dl_reties < 3: 115 | # Count a retry 116 | dl_reties += 1 117 | print('Cache hash does not match source hash... retying download...') 118 | # Run another sync 119 | return self.__sync( 120 | content_url, 121 | save_to, 122 | package_name, 123 | version, 124 | source_hash=source_hash, 125 | source_hash_method=source_hash_method, 126 | dl_reties=dl_reties, 127 | up_retries=up_retries, 128 | force_dl=True, 129 | force_up=force_up 130 | ) 131 | elif not hash_verified and dl_reties >= 3: 132 | # Reached max retries 133 | print('Retried to download the package 3 times. Skipping :( ') 134 | return False 135 | else: 136 | print('Skipping cache hash verification...') 137 | else: 138 | # API Error 139 | print(''.join(['API error! Code: ', str(pull_request.status_code)])) 140 | return False 141 | 142 | # Made it here? Cache hash either verified or skipped verification 143 | if pull_request.status_code == 404 or pull_request.status_code == 200 or force_up: 144 | if pull_request.status_code == 404 or force_up: 145 | # Send the package up using the dotnet binary 146 | return_code = push_package_dotnet( 147 | save_to, 148 | self.local_api_upload_url, 149 | self.local_api_key, 150 | self.dotnet_path 151 | ) 152 | 153 | if return_code is not 0: 154 | print('Push failed, retying with native library.') 155 | print('Uploading package...') 156 | # If the dotnet binary does not return 0 try to use a python library 157 | push_response = push_package_native( 158 | save_to, 159 | self.local_api_upload_url, 160 | self.local_api_key 161 | ) 162 | print(''.join(['Response code: ', str(push_response.status_code)])) 163 | if push_response.status_code is not 200: 164 | print('Upload failed... :-(') 165 | print(''.join(['Response message: ', str(push_response.content)])) 166 | 167 | else: 168 | # API Error 169 | # This should never happen. It should get caught above 170 | print(''.join(['API error! Code: ', str(pull_request.status_code)])) 171 | return False 172 | 173 | # If we have a source hash Start verifying it 174 | if source_hash is not None and self.verify_uploaded: 175 | use_target_json = self.local_json_api 176 | # Pull the package after uploading it 177 | pull_request = pull_package(package_name, version, self.local_packages_url, use_target_json) 178 | # Did we find the package 179 | if pull_request.status_code == 200: 180 | # Get the hash 181 | if use_target_json: 182 | # If we are using a json api get it this way 183 | target_hash = pull_request.json()['d']['PackageHash'] 184 | else: 185 | # If we use the XML api get it this way 186 | target_hash = pull_request.objectified.properties.PackageHash.text 187 | 188 | # Does the source hash match the target repo hash? 189 | if hashes_match(target_hash, source_hash): 190 | print('Package synced and verified!') 191 | return True 192 | else: 193 | print('Package synced but checksum do not match!') 194 | return False 195 | 196 | elif up_retries <= 3: 197 | up_retries += 1 198 | print(''.join(['API error! Code: ', str(pull_request.status_code)])) 199 | print('Package not synced retrying...') 200 | return self.__sync( 201 | content_url, 202 | save_to, 203 | package_name, 204 | version, 205 | source_hash=source_hash, 206 | source_hash_method=source_hash_method, 207 | dl_reties=dl_reties, 208 | up_retries=up_retries, 209 | force_dl=False, 210 | force_up=True 211 | ) 212 | 213 | elif up_retries >= 3: 214 | print('Max upload retries reached. Skipping :-( ') 215 | print(''.join(['API error! Code: ', str(pull_request.status_code)])) 216 | return False 217 | 218 | else: 219 | # API Error 220 | print(''.join(['API error! Code: ', str(pull_request.status_code)])) 221 | return False 222 | else: 223 | print('Package synced!') 224 | return True 225 | 226 | def sync_package(self, package): 227 | """ 228 | :param package: 229 | :return: 230 | """ 231 | # Extract the info that we need from the package entry 232 | # Dict keys vary depending if the page was pulled in XML or JSON 233 | use_remote_json = self.remote_json_api 234 | package_name = str(package[KEY_TITLE['json']]) if use_remote_json else str(package.title.text) 235 | version = str(package['Version']) if use_remote_json else str(package.properties.Version.text) 236 | metadata = package['__metadata'] if use_remote_json else {} 237 | content_url = metadata['media_src'] if use_remote_json else package.content['src'] 238 | package_n_v = '.'.join([package_name, version]) 239 | save_to = os.path.join(self.package_storage_path, '.'.join([package_n_v, 'nupkg'])) 240 | if use_remote_json: 241 | remote_hash = str(package['PackageHash']) 242 | remote_hash_method = str(package['PackageHashAlgorithm']).lower() 243 | else: 244 | remote_hash = str(package.properties.PackageHash.text) 245 | remote_hash_method = str(package.properties.PackageHashAlgorithm.text).lower() 246 | 247 | # Begin package sync 248 | print('') 249 | print(''.join(['########## ', package_n_v, ' ##########'])) 250 | 251 | sync = self.__sync(content_url, save_to, package_name, version, source_hash=remote_hash, 252 | source_hash_method=remote_hash_method) 253 | 254 | print('Done!') 255 | return sync 256 | 257 | def delta_sync(self): 258 | url = self.update_feed 259 | previous_delta = read_delta() 260 | new_delta = None 261 | lock_file = '/tmp/reflector_full.lock' 262 | delta_lock_file = '/tmp/reflector_delta.lock' 263 | if not exists(lock_file) and not exists(delta_lock_file): 264 | # Create the lock file 265 | mknod(delta_lock_file) 266 | 267 | if previous_delta is not None: 268 | print(' '.join(['Syncing packages since:', previous_delta])) 269 | previous_delta = utc_to_epoch(previous_delta) 270 | else: 271 | print('No previous delta syncs. Syncing all updates!') 272 | previous_delta = first_epoch() 273 | 274 | # Grab the update feed 275 | response = pull_updates(url) 276 | # Did the request go well? 277 | if response.status_code == 200: 278 | # Get the page 279 | page = response.objectified 280 | # Get all items 281 | items = page.find_all('item') 282 | # Loop over the items 283 | for item in items: 284 | # Get when this was updated 285 | updated = utc_to_epoch(item.updated.text) 286 | # Get the new delta on the first package 287 | if new_delta is None: 288 | new_delta = item.updated.text 289 | # determine if it has been updated since the last run 290 | if updated > previous_delta: 291 | # Grab the package info 292 | parts = str(item.origLink.text).split('/') 293 | version = parts[-1] 294 | title = parts[-2] 295 | # Sync the package 296 | pull_response = pull_package(title, version, self.remote_packages_url, self.remote_json_api) 297 | if pull_response.status_code == 200: 298 | package = pull_response.json() if self.remote_json_api else pull_response.objectified 299 | self.sync_package(package) 300 | else: 301 | print('Received bad http code from remote API when pulling package. Response Code: ' + str( 302 | pull_response.status_code)) 303 | 304 | # If the new delta is still not set. Set it to the previous one 305 | if new_delta is None: 306 | new_delta = epoch_to_utc(previous_delta) 307 | # write epoch to the delta file 308 | store_delta(new_delta) 309 | # remove the lock file 310 | remove(delta_lock_file) 311 | else: 312 | print('Received bad http code from remote API. Response Code: ' + str(response.status_code)) 313 | return False 314 | else: 315 | print('Lock file exists, is there a sync session running?') 316 | return True 317 | 318 | def sync_packages(self): 319 | """ 320 | :return: 321 | """ 322 | done = False 323 | cool_down_counter = 250 324 | url = self.remote_packages_url 325 | use_remote_json = self.remote_json_api 326 | lock_file = '/tmp/reflector_full.lock' 327 | 328 | if not exists(lock_file): 329 | # Create the lock file 330 | mknod(lock_file) 331 | while not done: 332 | print('Pulling packages from: ' + url) 333 | # pull packages from the remote api 334 | response = pull_packages(url, json=use_remote_json) 335 | if response: 336 | # was the response good? 337 | if response.status_code == 200: 338 | if use_remote_json: 339 | # Handle JSON pages 340 | page = response.json() 341 | # data object 342 | data = page['d'] 343 | # Grab the results 344 | results = data['results'] if 'results' in data else [] 345 | # For each result 346 | for package in results: 347 | # sync it! 348 | self.sync_package(package) 349 | # If we have a next key continue to the next page 350 | if VALUE_NEXT['json'] in data: 351 | # Set the url 352 | url = data[VALUE_NEXT['json']] 353 | else: 354 | # Break out 355 | done = True 356 | else: 357 | # Handle XML pages 358 | page = response.objectified 359 | entries = page.find_all('entry') 360 | # Whats the size of the entry list 361 | if len(entries) > 0: 362 | for package in entries: 363 | # sync it! 364 | self.sync_package(package) 365 | 366 | links = page.find_all('link') 367 | # Get the last link on the page 368 | link = links[0] if 0 > (len(links) - 1) else links[(len(links) - 1)] 369 | # If the last link is the next link set it's url as the target url for the next iteration 370 | if link[KEY_REL] == VALUE_NEXT['xml']: 371 | url = str(link['href']) 372 | print(' ') 373 | cool_down_counter -= 1 374 | if cool_down_counter == 1: 375 | # Cool down for 5 seconds every 250 pages... 376 | print('Cooling down for 5 seconds...') 377 | print(' ') 378 | sleep(30) 379 | cool_down_counter = 250 380 | else: 381 | print(' ') 382 | print('Done!') 383 | # Remove the lock file 384 | remove(lock_file) 385 | # Break out 386 | done = True 387 | else: 388 | print( 389 | 'Received bad http code from remote API. Sleeping for 10 and trying again. Response Code: ' + str( 390 | response.status_code)) 391 | sleep(10) 392 | else: 393 | print('Timed out when pulling package list... sleeping for 25 then trying again.') 394 | sleep(25) 395 | else: 396 | print('Lock file exists, is there a sync session running?') 397 | return True 398 | --------------------------------------------------------------------------------