├── .gitignore ├── README.md ├── test-case.py └── docker_pull.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-pull-py 2 | 3 | Pull a docker image by python script, without docker client. 4 | 5 | This repository fork from https://github.com/NotGlop/docker-drag. 6 | And add a feature that it can pull image with interruption resuming. 7 | 8 | 9 | 通过 Python 来拉取 docker image。同时支持断点续传功能。 10 | -------------------------------------------------------------------------------- /test-case.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import unittest 5 | import docker_pull 6 | 7 | 8 | class ParseImageTest(unittest.TestCase): 9 | def test_parse_image1(self): 10 | # 官方镜像 11 | ret = docker_pull.parse_image('node') 12 | self.assertEqual(ret['repo'], 'library') 13 | self.assertEqual(ret['tag'], 'latest') 14 | self.assertEqual(ret['registry'], 'registry-1.docker.io') 15 | self.assertEqual(ret['repository'], 'library/node') 16 | 17 | ret = docker_pull.parse_image('node:10-alpine') 18 | self.assertEqual(ret['repo'], 'library') 19 | self.assertEqual(ret['tag'], '10-alpine') 20 | self.assertEqual(ret['registry'], 'registry-1.docker.io') 21 | self.assertEqual(ret['repository'], 'library/node') 22 | 23 | def test_parse_image2(self): 24 | # 用户的镜像 25 | ret = docker_pull.parse_image('user/image') 26 | self.assertEqual(ret['repo'], 'user') 27 | self.assertEqual(ret['tag'], 'latest') 28 | self.assertEqual(ret['registry'], 'registry-1.docker.io') 29 | self.assertEqual(ret['repository'], 'user/image') 30 | 31 | ret = docker_pull.parse_image('user/image:tag') 32 | self.assertEqual(ret['repo'], 'user') 33 | self.assertEqual(ret['tag'], 'tag') 34 | self.assertEqual(ret['registry'], 'registry-1.docker.io') 35 | self.assertEqual(ret['repository'], 'user/image') 36 | 37 | def test_parse_image3(self): 38 | # 第三方仓库 39 | ret = docker_pull.parse_image('mcr.microsoft.com/windows/servercore') 40 | self.assertEqual(ret['repo'], 'windows') 41 | self.assertEqual(ret['tag'], 'latest') 42 | self.assertEqual(ret['registry'], 'mcr.microsoft.com') 43 | self.assertEqual(ret['repository'], 'windows/servercore') 44 | 45 | ret = docker_pull.parse_image('mcr.microsoft.com/windows/servercore:ltsc2016') 46 | self.assertEqual(ret['repo'], 'windows') 47 | self.assertEqual(ret['tag'], 'ltsc2016') 48 | self.assertEqual(ret['registry'], 'mcr.microsoft.com') 49 | self.assertEqual(ret['repository'], 'windows/servercore') 50 | 51 | def test_parse_image4(self): 52 | ret = docker_pull.parse_image('node@sha256:075012d2072be942e17da73a35278be89707266010fb6977bfc43dae5d492ab4') 53 | self.assertEqual(ret['repo'], 'library') 54 | self.assertEqual(ret['tag'], 'sha256:075012d2072be942e17da73a35278be89707266010fb6977bfc43dae5d492ab4') 55 | self.assertEqual(ret['registry'], 'registry-1.docker.io') 56 | self.assertEqual(ret['repository'], 'library/node') 57 | 58 | 59 | if __name__ == '__main__': 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /docker_pull.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import os 5 | import sys 6 | import gzip 7 | from io import BytesIO 8 | import json 9 | import hashlib 10 | import shutil 11 | import requests 12 | import tarfile 13 | import urllib3 14 | import copy 15 | urllib3.disable_warnings() 16 | 17 | 18 | def mkdir(path): 19 | if (not os.path.exists(path)): 20 | os.mkdir(path) 21 | 22 | 23 | def parse_image(image_name): 24 | """ 25 | 解析 image name 26 | """ 27 | repo = 'library' 28 | tag = 'latest' 29 | imgparts = image_name.split('/') 30 | try: 31 | img, tag = imgparts[-1].split('@') 32 | except ValueError: 33 | try: 34 | img, tag = imgparts[-1].split(':') 35 | except ValueError: 36 | img = imgparts[-1] 37 | # Docker client doesn't seem to consider the first element as a potential registry unless there is a '.' or ':' 38 | if len(imgparts) > 1 and ('.' in imgparts[0] or ':' in imgparts[0]): 39 | # 第三方仓库 40 | registry = imgparts[0] 41 | repo = '/'.join(imgparts[1:-1]) 42 | slug = '%s/%s/%s' % (registry, repo, img) 43 | else: 44 | registry = 'registry-1.docker.io' 45 | if len(imgparts[:-1]) != 0: 46 | repo = '/'.join(imgparts[:-1]) 47 | slug = '%s/%s' % (repo, img) 48 | else: 49 | repo = 'library' 50 | slug = '%s' % (img) 51 | repository = '{}/{}'.format(repo, img) 52 | return { 53 | 'tag': tag, 'repo': repo, 54 | 'repository': repository, 55 | 'registry': registry, 56 | 'img': img, 'slug': slug 57 | } 58 | 59 | 60 | def get_auth_url(docker_image): 61 | auth_url = 'https://auth.docker.io/token' 62 | reg_service = 'registry.docker.io' 63 | resp = requests.get('https://{}/v2/'.format(docker_image['registry']), verify=False) 64 | if resp.status_code == 401: 65 | auth_url = resp.headers['WWW-Authenticate'].split('"')[1] 66 | try: 67 | reg_service = resp.headers['WWW-Authenticate'].split('"')[3] 68 | except IndexError: 69 | reg_service = "" 70 | return {'auth_url': auth_url, 'reg_service': reg_service} 71 | 72 | 73 | def get_auth_head(docker_image, auth, scope): 74 | resp = requests.get( 75 | '{}?service={}&scope=repository:{}:pull'.format( 76 | auth['auth_url'], auth['reg_service'], docker_image['repository'] 77 | ), verify=False 78 | ) 79 | access_token = resp.json()['token'] 80 | auth_head = {'Authorization':'Bearer '+ access_token, 'Accept': scope} 81 | return auth_head 82 | 83 | 84 | # ======================== manifest start ==================================== 85 | def dump_manifests(manifests): 86 | for manifest in manifests: 87 | for key, value in manifest["platform"].items(): 88 | sys.stdout.write('{}: {}, '.format(key, value)) 89 | print('digest: {}'.format(manifest["digest"])) 90 | 91 | 92 | def fetch_manifest_list(docker_image, auth): 93 | auth_head = get_auth_head( 94 | docker_image, auth, 95 | 'application/vnd.docker.distribution.manifest.list.v2+json' 96 | ) 97 | resp = requests.get( 98 | 'https://{}/v2/{}/manifests/{}'.format( 99 | docker_image['registry'], docker_image['repository'], docker_image['tag'] 100 | ), 101 | headers=auth_head, verify=False 102 | ) 103 | if (resp.status_code != 200): 104 | print(resp.content) 105 | raise Exception('fetch_manifest_list') 106 | 107 | manifests = resp.json()['manifests'] 108 | return manifests 109 | 110 | 111 | def fetch_manifest(docker_image, auth): 112 | auth_head = get_auth_head( 113 | docker_image, auth, 114 | 'application/vnd.docker.distribution.manifest.v2+json' 115 | ) 116 | resp = requests.get( 117 | 'https://{}/v2/{}/manifests/{}'.format( 118 | docker_image['registry'], docker_image['repository'], docker_image['tag'] 119 | ), 120 | headers=auth_head, verify=False 121 | ) 122 | if (resp.status_code != 200): 123 | print('[-] Cannot fetch manifest for {} [HTTP {}]'.format(docker_image['repository'], resp.status_code)) 124 | print(resp.content) 125 | 126 | dump_manifests(fetch_manifest_list(docker_image, auth)) 127 | raise Exception('fetch_manifest') 128 | 129 | return resp.json() 130 | # ======================== manifest end ==================================== 131 | 132 | 133 | # ======================== Layers start ==================================== 134 | def fetch_blob(docker_image, auth, manifest): 135 | auth_head = get_auth_head( 136 | docker_image, auth, 137 | 'application/vnd.docker.distribution.manifest.v2+json' 138 | ) 139 | resp = requests.get( 140 | 'https://{}/v2/{}/blobs/{}'.format( 141 | docker_image['registry'], docker_image['repository'], manifest['config']['digest'] 142 | ), 143 | headers=auth_head, verify=False 144 | ) 145 | return resp.json() 146 | 147 | 148 | def download_layer_blob(docker_image, auth, layer, layerdir): 149 | """ 150 | 下载 manifest 的 layer 文件 151 | """ 152 | layer_filename = os.path.join(layerdir, 'layer_gzip.tar') 153 | blob_digest = layer['digest'] 154 | 155 | print(blob_digest[7:19] + ': Downloading...') 156 | 157 | auth_head = get_auth_head( 158 | docker_image, auth, 159 | 'application/vnd.docker.distribution.manifest.v2+json' 160 | ) 161 | if (os.path.exists(layer_filename)): 162 | # 断点续传 163 | size = os.stat(layer_filename).st_size 164 | if size == layer['size']: 165 | print('%s 已存在' % (layer_filename)) 166 | return layer_filename 167 | auth_head['Range'] = 'bytes=%d-' % (size) 168 | 169 | bresp = requests.get( 170 | 'https://{}/v2/{}/blobs/{}'.format( 171 | docker_image['registry'], docker_image['repository'], blob_digest 172 | ), 173 | headers=auth_head, 174 | stream=True, 175 | verify=False 176 | ) 177 | if (bresp.status_code >= 400): 178 | print('\rERROR: Cannot download layer {} [HTTP {}]'.format(blob_digest[7:19], bresp.status_code)) 179 | print(bresp.content) 180 | raise Exception('download_layer_blob') 181 | 182 | if (bresp.status_code == 206): 183 | # 当前服务端支持断点续传 184 | open_flag = 'ab+' 185 | print('断点续传: %s' % (auth_head['Range'])) 186 | else: 187 | open_flag = 'wb' 188 | bresp.raise_for_status() 189 | unit = int(bresp.headers['Content-Length']) / 50 190 | acc = 0 191 | nb_traits = 0 192 | progress_bar(blob_digest, nb_traits) 193 | # 保存 layer 194 | with open(layer_filename, open_flag) as fp: 195 | for chunk in bresp.iter_content(chunk_size=8192): 196 | if chunk: 197 | fp.write(chunk) 198 | acc = acc + 8192 199 | if acc > unit: 200 | nb_traits = nb_traits + 1 201 | progress_bar(blob_digest, nb_traits) 202 | acc = 0 203 | 204 | sys.stdout.flush() 205 | print("\r{}: Pull complete [{}]".format(blob_digest[7:19], bresp.headers['Content-Length'])) 206 | return layer_filename 207 | 208 | # ======================== Layers end ==================================== 209 | 210 | # ======================== download start ==================================== 211 | def create_image_folder(docker_image): 212 | """ 213 | 创建临时目录,用于保存下载文件 214 | """ 215 | imgdir = 'tmp_{}_{}'.format(docker_image['img'], docker_image['tag'].replace(':', '@')) 216 | mkdir(imgdir) 217 | return imgdir 218 | 219 | 220 | def progress_bar(digest, nb_traits): 221 | """ 222 | 显示下载进度条 223 | """ 224 | sys.stdout.write('\r' + digest[7:19] + ': Downloading [') 225 | for i in range(0, nb_traits): 226 | if i == nb_traits - 1: 227 | sys.stdout.write('>') 228 | else: 229 | sys.stdout.write('=') 230 | for i in range(0, 49 - nb_traits): 231 | sys.stdout.write(' ') 232 | sys.stdout.write(']') 233 | sys.stdout.flush() 234 | 235 | 236 | def decompress_all_layers(all_layer_dirs): 237 | """ 238 | 解压所有的 layer gzip 文件 239 | """ 240 | for layerdir in all_layer_dirs: 241 | tar_file = os.path.join(layerdir, 'layer.tar') 242 | gzip_file = os.path.join(layerdir, 'layer_gzip.tar') 243 | if not os.path.exists(gzip_file): 244 | continue 245 | print('准备解压 %s' % (gzip_file)) 246 | with open(tar_file, "wb") as fp: 247 | unzLayer = gzip.open(gzip_file,'rb') 248 | shutil.copyfileobj(unzLayer, fp) 249 | unzLayer.close() 250 | # 解压之后删除 gzip 文件 251 | os.remove(gzip_file) 252 | 253 | 254 | def pull_image(docker_image, auth, manifest, blob): 255 | parentid='' 256 | imgdir = create_image_folder(docker_image) 257 | 258 | # 保存 blob 信息 259 | digest = manifest['config']['digest'] 260 | with open(os.path.join(imgdir, digest[7:]+'.json'), 'wb') as fp: 261 | json.dump(blob, fp, indent=2) 262 | 263 | content = [{ 264 | 'Config': digest[7:] + '.json', 265 | 'RepoTags': [ "%s:%s" % (docker_image['slug'], docker_image['tag']) ], 266 | 'Layers': [ ] 267 | }] 268 | 269 | i = 1 270 | fake_layerid = '' 271 | all_layers = [] 272 | for layer in manifest['layers']: 273 | blob_digest = layer['digest'] 274 | fake_layerid = hashlib.sha256((parentid+'\n'+blob_digest+'\n').encode('utf-8')).hexdigest() 275 | layerdir = os.path.join(imgdir, fake_layerid) 276 | mkdir(layerdir) 277 | all_layers.append(layerdir) 278 | 279 | # Creating VERSION file 280 | with open(os.path.join(layerdir, 'VERSION'), 'w') as fp: 281 | fp.write('1.0') 282 | 283 | download_layer_blob(docker_image, auth, layer, layerdir) 284 | content[0]['Layers'].append(os.path.join(fake_layerid, 'layer.tar')) 285 | # 在 layer tar 目录下创建一个 json 文件 ======================= 286 | with open(os.path.join(layerdir, 'json'), 'w') as fp: 287 | if i == len(manifest['layers']): 288 | # 最后一个 layer 文件 ================================= 289 | json_obj = copy.deepcopy(blob) 290 | del json_obj['history'] 291 | try: 292 | del json_obj['rootfs'] 293 | except: # Because Microsoft loves case insensitiveness 294 | del json_obj['rootfS'] 295 | else: 296 | # 不是最后一个 layer 文件 使用空的 json ================ 297 | json_obj = { 298 | 'container_config': { 299 | 'AttachStderr': False, 300 | 'AttachStdin': False, 301 | 'AttachStdout': False, 302 | 'Cmd': None, 303 | 'Domainname': '', 304 | 'Entrypoint': None, 305 | 'Env': None, 306 | 'Hostname': '', 307 | 'Image': '', 308 | 'Labels': None, 309 | 'OnBuild': None, 310 | 'OpenStdin': False, 311 | 'StdinOnce': False, 312 | 'Tty': False, 313 | 'User': '', 314 | 'Volumes': None, 315 | 'WorkingDir': '' 316 | }, 317 | 'created': '1970-01-01T00:00:00Z' 318 | } 319 | 320 | json_obj['id'] = fake_layerid 321 | if parentid: 322 | json_obj['parent'] = parentid 323 | parentid = json_obj['id'] 324 | json.dump(json_obj, fp, indent=2) 325 | i += 1 326 | 327 | # 解压 gzip 文件为 tar 文件 ======================================= 328 | decompress_all_layers(all_layers) 329 | # 创建 manifest 文件 330 | with open(os.path.join(imgdir, 'manifest.json'), 'w') as fp: 331 | json.dump(content, fp, indent=2) 332 | with open(os.path.join(imgdir, 'repositories'), 'w') as fp: 333 | json.dump({ 334 | docker_image['slug']: { docker_image['tag']: fake_layerid } 335 | }, fp, indent=2) 336 | 337 | # 创建 image tar 文件 338 | docker_tar = docker_image['repository'].replace('/', '_') + '.tar' 339 | print('create image archive...') 340 | tar = tarfile.open(docker_tar, "w") 341 | tar.add(imgdir, arcname=os.path.sep) 342 | tar.close() 343 | print('\rDocker image pulled: ' + docker_tar) 344 | # ======================== download end ==================================== 345 | 346 | 347 | def print_manifest(image_name): 348 | docker_image = parse_image(image_name) 349 | auth = get_auth_url(docker_image) 350 | 351 | print(json.dumps(fetch_manifest_list(docker_image, auth), indent=2)) 352 | manifest = fetch_manifest(docker_image, auth) 353 | print(json.dumps(manifest, indent=2)) 354 | blob = fetch_blob(docker_image, auth, manifest) 355 | print(json.dumps(blob, indent=2)) 356 | 357 | 358 | def main(image_name): 359 | docker_image = parse_image(image_name) 360 | auth = get_auth_url(docker_image) 361 | 362 | manifest = fetch_manifest(docker_image, auth) 363 | blob = fetch_blob(docker_image, auth, manifest) 364 | pull_image(docker_image, auth, manifest, blob) 365 | 366 | 367 | if __name__ == '__main__': 368 | if len(sys.argv) != 2 : 369 | print('Usage:\n\t%s [registry/][repository/]image[:tag|@digest]\n' % (sys.argv[0])) 370 | exit(1) 371 | 372 | main(sys.argv[1]) 373 | --------------------------------------------------------------------------------