├── setup.py ├── .gitignore ├── LICENSE ├── README.md └── pullbox └── __init__.py /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="pullbox", 5 | version='0.1', 6 | description="A dead-simle Dropbox alternative using Git", 7 | keywords='dropbox,file synchronization,git', 8 | author='Prashanth Ellina', 9 | author_email="Use the github issues", 10 | url="https://github.com/prashanthellina/pullbox", 11 | license='MIT License', 12 | install_requires=[ 13 | 'filelock', 14 | 'watchdog', 15 | ], 16 | package_dir={'pullbox': 'pullbox'}, 17 | packages=find_packages('.'), 18 | include_package_data=True, 19 | 20 | entry_points = { 21 | 'console_scripts': [ 22 | 'pullbox = pullbox:main', 23 | ], 24 | }, 25 | ) 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Prashanth Ellina 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pullbox 2 | 3 | `Pullbox` is a very simple implementation that can serve as an alternative 4 | for Dropbox that is based on Git. It works currently on any Linux-like OS 5 | and OSX but not on Windows. 6 | 7 | ## Why? 8 | 9 | Dropbox works well enough and works on many platforms. Although your data is 10 | on someone else's server, it is probably safer over there than with you (for 11 | most cases). I wrote `Pullbox` to overcome a specific limitation in Dropbox 12 | i.e. Symlinks. Dropbox does not "see" symlinks. Although it synchronizes the 13 | content pointed to by the symlink, it forgets that fact that it is a Symlink 14 | when you sync to another computer. 15 | 16 | I want to maintain my personal wiki and journal as plain text files. In 17 | order to organize my notes structure, I depend on symlinks (so I can put the 18 | same note under multiple directories). Dropbox does not support this 19 | use-case. 20 | 21 | ## How does it work? 22 | 23 | `Pullbox` needs SSH access to a remote Linux server that has `git` and 24 | `inotifywait` commands installed. This serves as the backup location for 25 | your local data. 26 | 27 | `Pullbox` monitors file system activity in the local directory and 28 | automatically pushes changes to the remote repo. The monitoring is done 29 | using `inotify` on Linux, `FSEvents` on OSX, `kqueue` on BSD style OSs. 30 | 31 | `Pullbox` also monitors file system activity on the remote repo and 32 | automatically pulls changes to the local repo when needed. This is achieved 33 | by using `ssh` and running `inotifywait` on the server (a lot like AJAX 34 | long-polling except we use SSH here instead of HTTP). 35 | 36 | ## Setting up 37 | 38 | ### Backup Server 39 | 40 | Instructions shown below assume Ubuntu Linux. You can modify based on the 41 | actual distro you have. Let us say the domain name of the backup server is 42 | `example.com` 43 | 44 | ```bash 45 | sudo apt-get install git inotify-tools 46 | ``` 47 | 48 | ### Your local machine 49 | 50 | ```bash 51 | sudo pip install git+git://github.com/prashanthellina/pullbox 52 | ``` 53 | 54 | I am assuming that the username on the backup server is `prashanth`. We need 55 | to setup password-less SSH login to `prashanth@example.com` (instructions 56 | [here](http://www.linuxproblem.org/art_9.html)) 57 | 58 | `Pullbox` depends on password-less login, so make sure it is working before 59 | proceeding. 60 | 61 | Let us assume that your local directory that you want to sync is 62 | `/home/prashanth/notes`. Make sure that this directory is *not* present 63 | the very first time you start `Pullbox`. This allows `Pullbox` to clone 64 | the remote repo properly. You can run `Pullbox` manually by running the 65 | following command. 66 | 67 | ```bash 68 | pullbox --log-level DEBUG /home/prashanth/notes prashanth@example.com 69 | ``` 70 | 71 | That's it! Your directory will now be kept in sync with the remote 72 | server repo as long as the `pullbox` command above runs. In order to have 73 | the command run all the time (after system reboot and upon accidental 74 | killing etc), put an entry in crontab like so 75 | 76 | ```bash 77 | * * * * * /usr/local/bin/pullbox --log-level DEBUG --log /tmp/pullbox.log --quiet /home/prashanth/notes prashanth@example.com &> /dev/null 78 | ``` 79 | -------------------------------------------------------------------------------- /pullbox/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import shlex 5 | import logging 6 | import logging.handlers 7 | import tempfile 8 | import datetime 9 | import argparse 10 | import threading 11 | import subprocess 12 | from distutils.spawn import find_executable 13 | 14 | import filelock 15 | from watchdog.observers import Observer 16 | from watchdog.events import FileSystemEventHandler 17 | 18 | # prevent watchdog module from writing DEBUG logs 19 | # as that is adding too much confusion during debugging 20 | logging.getLogger('watchdog').setLevel(logging.WARNING) 21 | 22 | DEFAULT_LOCK_FILE = os.path.join(tempfile.gettempdir(), 'pullbox.lock') 23 | 24 | class PullboxException(Exception): pass 25 | 26 | class PullboxCalledProcessError(PullboxException): 27 | def __init__(self, cmd, retcode): 28 | self.cmd = cmd 29 | self.retcode = retcode 30 | 31 | def __str__(self): 32 | return 'PullboxCalledProcessError(code=%s, cmd="%s")' % \ 33 | (self.retcode, self.cmd) 34 | 35 | __unicode__ = __str__ 36 | __repr__ = __str__ 37 | 38 | LOG_FORMATTER = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 39 | LOG_DEFAULT_FNAME = 'log.pullbox' 40 | MAX_LOG_FILE_SIZE = 10 * 1024 * 1024 # 10MB 41 | 42 | def init_logger(fname, log_level, quiet=False): 43 | log = logging.getLogger('') 44 | 45 | stderr_hdlr = logging.StreamHandler(sys.stderr) 46 | rofile_hdlr = logging.handlers.RotatingFileHandler(fname, 47 | maxBytes=MAX_LOG_FILE_SIZE, backupCount=10) 48 | hdlrs = (stderr_hdlr, rofile_hdlr) 49 | 50 | for hdlr in hdlrs: 51 | hdlr.setFormatter(LOG_FORMATTER) 52 | log.addHandler(hdlr) 53 | 54 | log.addHandler(rofile_hdlr) 55 | if not quiet: log.addHandler(stderr_hdlr) 56 | 57 | log.setLevel(getattr(logging, log_level.upper())) 58 | 59 | return log 60 | 61 | class LocalFSEventHandler(FileSystemEventHandler): 62 | def __init__(self, on_change): 63 | self.on_change = on_change 64 | 65 | def on_any_event(self, evt): 66 | is_git_dir = '.git' in evt.src_path.split(os.path.sep) 67 | is_dot_file = os.path.basename(evt.src_path).startswith('.') 68 | is_dir_modified = evt.event_type == 'modified' and evt.is_directory 69 | 70 | if not (is_git_dir or is_dot_file or is_dir_modified): 71 | self.on_change() 72 | 73 | class Pullbox(object): 74 | BINARIES_NEEDED = ['git', 'ssh'] 75 | BINARIES_NEEDED_REMOTE = ['git', 'inotifywait'] 76 | 77 | # Frequency at which client must poll data 78 | # server for data changes (if any) 79 | POLL_INTERVAL = 60 # seconds 80 | 81 | def __init__(self, server, path, log, suffix): 82 | self.server = server 83 | self.path = os.path.abspath(path) 84 | self.log = log 85 | 86 | self.remote_name = os.path.basename(path.rstrip(os.path.sep)) 87 | if suffix: 88 | self.remote_name += ".git" 89 | # Setup monitoring of local repo changes 90 | self.fs_observer = Observer() 91 | self.fs_observer.schedule(LocalFSEventHandler(self.on_fs_change), 92 | path, recursive=True) 93 | 94 | self.fs_changed = True 95 | 96 | # time at which changes (if any) need to downloaded 97 | # from remote repo 98 | self.next_pull_at = 0 99 | 100 | def on_fs_change(self): 101 | self.fs_changed = True 102 | 103 | def invoke_process(self, cmd, ignore_code=0): 104 | self.log.debug('invoke_process(%s)' % cmd) 105 | 106 | devnull = open(os.devnull, 'w') 107 | r = subprocess.call(shlex.split(cmd), stdout=devnull, stderr=devnull) 108 | 109 | if r == 130: 110 | raise KeyboardInterrupt 111 | 112 | if not isinstance(ignore_code, (list, tuple)): 113 | ignore_code = [ignore_code] 114 | 115 | if r != 0 and r not in ignore_code: 116 | raise PullboxCalledProcessError(cmd, r) 117 | 118 | def check_binaries(self): 119 | self.log.debug('Checking presence of local binaries "%s"' % \ 120 | ', '.join(self.BINARIES_NEEDED)) 121 | 122 | for binf in self.BINARIES_NEEDED: 123 | if not find_executable(binf): 124 | raise PullboxException('"%s" binary required' % binf) 125 | 126 | def check_remote_binaries(self): 127 | self.log.debug('Checking presence of remote binaries "%s"' % \ 128 | ', '.join(self.BINARIES_NEEDED_REMOTE)) 129 | 130 | for binf in self.BINARIES_NEEDED_REMOTE: 131 | cmd = 'ssh %s which %s' % (self.server, binf) 132 | try: 133 | self.invoke_process(cmd) 134 | except PullboxCalledProcessError: 135 | raise PullboxException( 136 | '"%s" remote binary required (or) ' 137 | 'could not connect to server' % binf) 138 | 139 | def ensure_remote_repo(self): 140 | # git init creates a new repo if none exists else 141 | # "reinitializes" which is like a no-op for our purposes 142 | cmd = 'ssh %s git init --bare %s' % (self.server, 143 | self.remote_name) 144 | self.invoke_process(cmd) 145 | 146 | 147 | def keeprunning(self, fn, wait=0, error_wait=1): 148 | ''' 149 | Keep @fn running on success or failure in an infinite loop 150 | - On failure, log exception, wait for @error_wait seconds 151 | - On success, wait for @wait seconds 152 | ''' 153 | 154 | while 1: 155 | try: 156 | fn() 157 | except (SystemExit, KeyboardInterrupt): raise 158 | except: 159 | self.log.exception('During run of "%s" func' % fn.func_name) 160 | time.sleep(error_wait) 161 | continue 162 | 163 | time.sleep(wait) 164 | 165 | def track_remote_changes(self): 166 | cmd = 'ssh %s inotifywait -rqq -e modify -e move -e create -e delete %s' % \ 167 | (self.server, self.remote_name) 168 | self.invoke_process(cmd) 169 | self.next_pull_at = time.time() 170 | 171 | def init_local_repo(self): 172 | bpath = os.path.dirname(self.path.rstrip(os.path.sep)) 173 | 174 | if not os.path.exists(bpath): 175 | os.makedirs(bpath) 176 | 177 | cwd = os.getcwd() 178 | try: 179 | os.chdir(bpath) 180 | self.invoke_process('git clone %s:%s' % (self.server, self.remote_name)) 181 | # add a dummy file to avoid trouble 182 | os.chdir(self.path) 183 | self.invoke_process('touch README.md') 184 | self.invoke_process('git add README.md') 185 | self.invoke_process('git commit -a -m "initial"') 186 | self.invoke_process('git push origin master') 187 | finally: 188 | os.chdir(cwd) 189 | 190 | def pull_changes(self): 191 | if self.next_pull_at > time.time(): return 192 | 193 | if not os.path.exists(self.path): 194 | self.init_local_repo() 195 | 196 | cwd = os.getcwd() 197 | try: 198 | os.chdir(self.path) 199 | self.invoke_process('git pull') 200 | finally: 201 | os.chdir(cwd) 202 | 203 | self.next_pull_at = time.time() + self.POLL_INTERVAL 204 | 205 | def push_changes(self): 206 | if not self.fs_changed: return 207 | 208 | cwd = os.getcwd() 209 | try: 210 | os.chdir(self.path) 211 | self.invoke_process('git add .') 212 | 213 | dt = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%S') 214 | msg = 'auto commit at %s' % dt 215 | self.invoke_process('git commit -a -m "%s"' % msg, ignore_code=1) 216 | 217 | self.invoke_process('git push origin master', ignore_code=-2) 218 | finally: 219 | os.chdir(cwd) 220 | 221 | self.fs_changed = False 222 | 223 | def run_thread(self, target): 224 | t = threading.Thread(target=target) 225 | t.daemon = True 226 | t.start() 227 | return t 228 | 229 | def start(self): 230 | # ensure required binaries are available in PATH 231 | self.check_binaries() 232 | self.check_remote_binaries() 233 | 234 | # ensure remote git repo is present (if not init one) 235 | self.ensure_remote_repo() 236 | 237 | # ensure local repo has latest data from server 238 | self.pull_changes() 239 | 240 | # start listening for changes in local repo 241 | self.fs_observer.start() 242 | 243 | # start threads 244 | K = self.keeprunning 245 | R = self.run_thread 246 | 247 | self.thread_track_remote_changes = R(lambda: K(self.track_remote_changes)) 248 | self.thread_pull_changes = R(lambda: K(self.pull_changes, wait=0.1)) 249 | self.thread_push_changes = R(lambda: K(self.push_changes, wait=0.1)) 250 | 251 | # wait for threads to complete 252 | self.thread_track_remote_changes.join() 253 | self.thread_pull_changes.join() 254 | self.thread_push_changes.join() 255 | 256 | def main(): 257 | parser = argparse.ArgumentParser(description='Pullbox') 258 | 259 | parser.add_argument('path', help='Path to data directory') 260 | parser.add_argument('server', help='IP/Domain name of backup server') 261 | parser.add_argument('--standard-suffix', action='store_true', 262 | help='Makes Pullbox use the standard .git suffix for bare git repos (server side only)') 263 | parser.add_argument('--log', default=LOG_DEFAULT_FNAME, 264 | help='Name of log file') 265 | parser.add_argument('--log-level', default='WARNING', 266 | help='Logging level as picked from the logging module') 267 | parser.add_argument('--quiet', action='store_true') 268 | 269 | parser.add_argument('--lock-file', default=DEFAULT_LOCK_FILE, 270 | help='Lock file to prevent multiple instances from running') 271 | 272 | args = parser.parse_args() 273 | lock = filelock.FileLock(args.lock_file) 274 | 275 | try: 276 | with lock.acquire(timeout=0): 277 | log = init_logger(args.log, args.log_level, quiet=args.quiet) 278 | p = Pullbox(args.server, args.path, log, args.standard_suffix) 279 | p.start() 280 | except (SystemExit, KeyboardInterrupt): sys.exit(1) 281 | except Exception, e: 282 | log = logging.getLogger('') 283 | log.exception('exiting process because of exception') 284 | print >> sys.stderr, str(e) 285 | sys.exit(1) 286 | 287 | sys.exit(0) 288 | 289 | if __name__ == '__main__': 290 | main() 291 | --------------------------------------------------------------------------------