├── .gitignore ├── Dockerfile ├── README.md └── git-sync.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-alpine 2 | 3 | # install git 4 | RUN apk add --update git && rm -rf /var/cache/apk/* 5 | 6 | # install click 7 | RUN pip install click 8 | 9 | # copy script 10 | COPY git-sync.py /git-sync.py 11 | RUN chmod +x /git-sync.py 12 | 13 | # run 14 | ENV GIT_SYNC_DEST /git/ 15 | ENTRYPOINT ["./git-sync.py"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # git-sync 2 | 3 | `git-sync` is a command that periodically syncs a remote git repository to a 4 | local directory. 5 | 6 | This Python implementation is inspired by the Kubernetes module found here: https://github.com/kubernetes/contrib/tree/master/git-sync 7 | 8 | ## Usage 9 | 10 | #### Python 11 | Install/setup 12 | ```bash 13 | pip install click 14 | git clone https://github.com/jlowin/git-sync.git 15 | cd git-sync && chmod +x git-sync.py 16 | ``` 17 | To see available arguments: 18 | ```bash 19 | ./git-sync.py --help 20 | ``` 21 | Pass arguments at the command line: 22 | ```bash 23 | ./git-sync.py repo.git --dest /dest/path --branch branch --wait 30 24 | ``` 25 | or with environment variables: 26 | ```bash 27 | GIT_SYNC_REPO=repo.git GIT_SYNC_DEST=/dest/path ./git-sync.py 28 | ``` 29 | 30 | #### Docker 31 | By default, the docker container syncs to an internal directory `/git`. 32 | ```bash 33 | docker run -v /vol jlowin/git-sync repo.git --dest /vol --wait 100 34 | ``` 35 | (This is a spectacularly useless example; you probably want to connect another container to the synced volume.) 36 | 37 | #### Kubernetes 38 | `git-sync` was originally designed as a side-car module that keeps a (shared) container volume in sync with a remote git repository. 39 | 40 | For example, in a replication controller definition: 41 | ```yaml 42 | # this volume holds the synced repo 43 | volumes: 44 | - name: git-sync-volume 45 | emptyDir: {} 46 | 47 | # this container syncs the repo every 1000 seconds 48 | containers: 49 | - name: git-sync 50 | image: jlowin/git-sync 51 | volumeMounts: 52 | - name: git-sync-volume 53 | mountPath: /git 54 | env: 55 | - name: GIT_SYNC_REPO 56 | value: 57 | - name: GIT_SYNC_DEST 58 | value: /git 59 | - name: GIT_SYNC_WAIT 60 | value: "1000" 61 | 62 | # this container can access the synced data in /synced 63 | - name: my-container 64 | volumeMounts: 65 | - name: git-sync-volume 66 | mountPath: /synced 67 | ``` 68 | -------------------------------------------------------------------------------- /git-sync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import click 5 | import datetime 6 | import os 7 | import shlex 8 | import subprocess 9 | import sys 10 | import time 11 | # try to be py2/3 compatible 12 | try: 13 | from urllib.parse import urlparse 14 | except ImportError: 15 | from urlparse import urlparse 16 | 17 | def sh(*args, **kwargs): 18 | """ Get subprocess output""" 19 | return subprocess.check_output(*args, **kwargs).decode().strip() 20 | 21 | def get_repo_at(dest): 22 | if not os.path.exists(os.path.join(dest, '.git')): 23 | raise ValueError('No repo found at {dest}'.format(**locals)) 24 | 25 | current_remote = sh( 26 | shlex.split('git config --get remote.origin.url'), 27 | cwd=dest) 28 | 29 | current_branch = sh( 30 | shlex.split('git rev-parse --abbrev-ref HEAD'), 31 | cwd=dest) 32 | 33 | return current_remote.lower(), current_branch.lower() 34 | 35 | def setup_repo(repo, dest, branch): 36 | """ 37 | Clones `branch` of remote `repo` to `dest`, if it doesn't exist already. 38 | Raises an error if a different repo or branch is found. 39 | """ 40 | dest = os.path.expanduser(dest) 41 | 42 | repo_name = urlparse(repo).path 43 | 44 | # if no git repo exists at dest, clone the requested repo 45 | if not os.path.exists(os.path.join(dest, '.git')): 46 | output = sh( 47 | ['git', 'clone', '--no-checkout', '-b', branch, repo, dest]) 48 | click.echo('Cloned ...{repo_name}'.format(**locals())) 49 | 50 | else: 51 | # if there is a repo, make sure it's the right one 52 | current_remote, current_branch = get_repo_at(dest) 53 | repo = repo.lower() 54 | if not repo.endswith('.git'): 55 | repo += '.git' 56 | if not current_remote.endswith('.git'): 57 | current_remote += '.git' 58 | parsed_remote = urlparse(current_remote) 59 | parsed_repo = urlparse(repo) 60 | 61 | if ( parsed_repo.netloc != parsed_remote.netloc 62 | or parsed_repo.path != parsed_remote.path): 63 | raise ValueError( 64 | 'Requested repo `...{repo_name}` but destination already ' 65 | 'has a remote repo cloned: {current_remote}'.format(**locals())) 66 | 67 | # and check that the branches match as well 68 | if branch.lower() != current_branch: 69 | raise ValueError( 70 | 'Requested branch `{branch}` but destination is ' 71 | 'already on branch `{current_branch}`'.format(**locals())) 72 | 73 | # and check that we aren't going to overwrite any changes! 74 | # modified_status: uncommited modifications 75 | # ahead_status: commited but not pushed 76 | modified_status = sh(shlex.split('git status -s'), cwd=dest) 77 | ahead_status = sh(shlex.split('git status -sb'), cwd=dest)[3:] 78 | if modified_status: 79 | raise ValueError( 80 | 'There are uncommitted changes at {dest} that syncing ' 81 | 'would overwrite'.format(**locals())) 82 | if '[ahead ' in ahead_status: 83 | raise ValueError( 84 | 'This branch is ahead of the requested repo and syncing would ' 85 | 'overwrite the changes: {ahead_status}'.format(**locals())) 86 | 87 | 88 | def sync_repo(repo, dest, branch, rev): 89 | """ 90 | Syncs `branch` of remote `repo` (at `rev`) to `dest`. 91 | Assumes `dest` has already been cloned. 92 | """ 93 | # fetch branch 94 | output = sh(['git', 'fetch', 'origin', branch], cwd=dest) 95 | click.echo('Fetched {branch}: {output}'.format(**locals())) 96 | 97 | # reset working copy 98 | if not rev: 99 | output = sh(['git', 'reset', '--hard', 'origin/' + branch], cwd=dest) 100 | else: 101 | output = sh(['git', 'reset', '--hard', rev], cwd=dest) 102 | 103 | # clean untracked files 104 | sh(['git', 'clean', '-dfq'], cwd=dest) 105 | 106 | click.echo('Reset to {rev}: {output}'.format(**locals())) 107 | 108 | repo_name = urlparse(repo).path 109 | click.echo( 110 | 'Finished syncing {repo_name}:{branch} at {t:%Y-%m-%d %H:%M:%S}'.format( 111 | **locals(), t=datetime.datetime.now())) 112 | 113 | @click.command() 114 | @click.option('--dest', '-d', envvar='GIT_SYNC_DEST', default=os.getcwd(), help='The destination path. Defaults to the current working directory; can also be set with envvar GIT_SYNC_DEST.') 115 | @click.option('--repo', '-r', envvar='GIT_SYNC_REPO', default='', help='The url of the remote repo to sync. Defaults to inferring from `dest`; can also be set with envvar GIT_SYNC_REPO.') 116 | @click.option('--branch', '-b', envvar='GIT_SYNC_BRANCH', default='', help='The branch to sync. Defaults to inferring from `repo` (if already cloned), otherwise defaults to master; can also be set with envvar GIT_SYNC_BRANCH.') 117 | @click.option('--wait', '-w', envvar='GIT_SYNC_WAIT', default=60, help='The number of seconds to pause after each sync. Defaults to 60; can also be set with envvar GIT_SYNC_WAIT.') 118 | @click.option('--run-once', '-1', envvar='GIT_SYNC_RUN_ONCE', is_flag=True, help="Run only once (don't loop). Defaults to off; can also be set with envvar GIT_SYNC_RUN_ONCE=true.") 119 | @click.option('--rev', envvar='GIT_SYNC_REV', default=None, help='The revision to sync. Defaults to HEAD; can also be set with envvar GIT_SYNC_REV.') 120 | @click.option('--debug', envvar='GIT_SYNC_DEBUG', is_flag=True, help='Print tracebacks on error.') 121 | def git_sync(repo, dest, branch, rev, wait, run_once, debug): 122 | """ 123 | Periodically syncs a remote git repository to a local directory. The sync 124 | is one-way; any local changes will be lost. 125 | """ 126 | 127 | if not debug: 128 | sys.excepthook = ( 129 | lambda etype, e, tb : print("{}: {}".format(etype.__name__, e))) 130 | 131 | # infer repo/branch 132 | if not repo and not branch: 133 | repo, branch = get_repo_at(dest) 134 | elif not repo: 135 | repo, _ = get_repo_at(dest) 136 | elif not branch: 137 | branch = 'master' 138 | 139 | setup_repo(repo, dest, branch) 140 | while True: 141 | sync_repo(repo, dest, branch, rev) 142 | if run_once: 143 | break 144 | click.echo('Waiting {wait} seconds...'.format(**locals())) 145 | time.sleep(wait) 146 | 147 | if __name__ == '__main__': 148 | git_sync() 149 | --------------------------------------------------------------------------------