├── spaghettifs ├── __init__.py ├── benchmark │ ├── __init__.py │ ├── simple_read.py │ ├── simple_files.py │ └── bench.py ├── tests │ ├── __init__.py │ ├── test_mount_commits.py │ ├── support.py │ ├── test_treetree.py │ ├── test_filesystem.py │ ├── test_easygit.py │ └── test_storage.py ├── cmd.py ├── treetree.py ├── filesystem.py ├── easygit.py └── storage.py ├── .gitignore ├── setup.py ├── README.rst └── LICENSE /spaghettifs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spaghettifs/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spaghettifs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | SpaghettiFS.egg-info 3 | dist 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="SpaghettiFS", 5 | version="0.1", 6 | description="Git-backed FUSE filesystem", 7 | keywords="git filesystem", 8 | url="http://github.com/alex-morega/SpaghettiFS", 9 | license="BSD License", 10 | author="Alex Morega", 11 | author_email="public@grep.ro", 12 | packages=find_packages(), 13 | setup_requires=['nose>=0.11'], 14 | install_requires=['dulwich>=0.3.3', 'fusepy>=1.0.r33'], 15 | test_suite="nose.collector", 16 | entry_points={ 17 | 'console_scripts': [ 18 | 'spaghettifs = spaghettifs.cmd:main', 19 | ], 20 | }, 21 | ) 22 | -------------------------------------------------------------------------------- /spaghettifs/benchmark/simple_read.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from pprint import pprint 3 | 4 | from probity import checksum 5 | checksum.BLOCK_SIZE = 65536 6 | from spaghettifs.benchmark import bench 7 | 8 | class FileReader(object): 9 | """ read the files in the given directory """ 10 | 11 | def __init__(self, subdir=''): 12 | self.subdir = subdir 13 | 14 | def __call__(self, mount_path): 15 | folder_path = path.join(mount_path, self.subdir) 16 | print checksum.folder_sha1(folder_path, lambda d: None) 17 | 18 | def main(repo_path, mount_path, subdir): 19 | bench.log_to_stderr() 20 | file_reader = FileReader(subdir) 21 | for c in range(1): 22 | stats = bench.measure(file_reader, repo_path, mount_path) 23 | pprint(stats) 24 | 25 | if __name__ == '__main__': 26 | import sys 27 | main(*sys.argv[1:]) 28 | -------------------------------------------------------------------------------- /spaghettifs/benchmark/simple_files.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | from spaghettifs.benchmark import bench 4 | 5 | def tiny_files_1k(mount_path): 6 | """ 1000 tiny files """ 7 | for c in xrange(1000): 8 | with open(path.join(mount_path, 'file-%d' % c), 'w') as f: 9 | f.write('asdf') 10 | 11 | def large_files_10(mount_path): 12 | """ 10 files of 10MB each """ 13 | for c in xrange(10): 14 | with open(path.join(mount_path, 'file-%d' % c), 'w') as f: 15 | for d in xrange(1024): 16 | f.write('my 10-byte' * 1024) 17 | 18 | def main(): 19 | bench.log_to_stderr() 20 | for f in (tiny_files_1k, large_files_10): 21 | for c in range(3): 22 | with bench.TempFS() as temp_fs: 23 | print '%s: %r' % (f.func_name, temp_fs.measure(f)) 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | SpaghettiFS 2 | =========== 3 | 4 | SpaghettiFS is a FUSE filesystem that stores data in a Git repository. 5 | 6 | Getting started 7 | --------------- 8 | SpaghettiFS code is experimental, not suitable for anything important. It will 9 | steal your files, crash your computer and burn down your house. Handle with 10 | care. That being said, here's a quick guide: 11 | 12 | - clone the source code: ``git clone 13 | git://github.com/alex-morega/SpaghettiFS.git`` 14 | - (optionally) set up a virtualenv 15 | - run ``python setup.py develop`` 16 | - run unit tests: ``python setup.py test -q`` or ``python 17 | spaghettifs/tests/all.py`` 18 | - create a blank filesystem: ``spaghettifs mkfs path/to/repo.sfs`` 19 | - mount the filesystem: ``spaghettifs mount path/to/repo.sfs path/to/mount`` 20 | 21 | Missing features 22 | ---------------- 23 | - file metadata: owner, permissions, create/modify/access times 24 | - symlinks, renaming of folders 25 | - fsck 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Alex Morega 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /spaghettifs/cmd.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | from optparse import OptionParser 4 | 5 | from spaghettifs import storage 6 | from spaghettifs import filesystem 7 | 8 | usage = """\ 9 | usage: %prog mkfs REPO_PATH 10 | %prog mount REPO_PATH MOUNT_PATH [options] 11 | %prog fsck REPO_PATH 12 | %prog upgrade REPO_PATH 13 | """.strip() 14 | 15 | parser = OptionParser(usage=usage) 16 | parser.add_option("-v", "--verbose", 17 | action="store_const", const=logging.DEBUG, dest="loglevel") 18 | parser.add_option("-q", "--quiet", 19 | action="store_const", const=logging.ERROR, dest="loglevel") 20 | parser.set_defaults(loglevel=logging.INFO) 21 | 22 | def main(): 23 | options, args = parser.parse_args() 24 | 25 | if not args: 26 | return parser.print_usage() 27 | 28 | elif args[0] == 'mkfs': 29 | if len(args) != 2: 30 | return parser.print_usage() 31 | storage.GitStorage.create(args[1]) 32 | 33 | elif args[0] == 'mount': 34 | if len(args) != 3: 35 | return parser.print_usage() 36 | repo_path, mount_path = args[1:] 37 | print "mounting %r at %r" % (repo_path, mount_path) 38 | filesystem.mount(repo_path, mount_path, loglevel=options.loglevel) 39 | 40 | elif args[0] == 'fsck': 41 | if len(args) != 2: 42 | return parser.print_usage() 43 | storage.fsck(args[1], sys.stdout) 44 | 45 | elif args[0] == 'upgrade': 46 | if len(args) != 2: 47 | return parser.print_usage() 48 | handler = logging.StreamHandler() 49 | handler.setLevel(options.loglevel) 50 | logging.getLogger('spaghettifs.storage.upgrade').addHandler(handler) 51 | for run_update in storage.all_updates: 52 | run_update(args[1]) 53 | 54 | else: 55 | return parser.print_usage() 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /spaghettifs/tests/test_mount_commits.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | import dulwich 4 | 5 | from test_filesystem import SpaghettiMountTestCase 6 | from spaghettifs import filesystem 7 | 8 | class MountCommits(SpaghettiMountTestCase): 9 | def tearDown(self): 10 | if self.mounted: 11 | self.umount() 12 | super(MountCommits, self).tearDown() 13 | 14 | def git_repo(self): 15 | return dulwich.repo.Repo(self.repo_path) 16 | 17 | def test_temporary_commit(self): 18 | self.mount() 19 | 20 | git = self.git_repo() 21 | try: 22 | git.refs['refs/heads/mounted'] 23 | except KeyError: 24 | self.fail('branch "mounted" does not exist') 25 | 26 | initial_heads = { 27 | "master": git.refs['refs/heads/master'], 28 | "mounted": git.refs['refs/heads/mounted'], 29 | } 30 | self.assertNotEqual(initial_heads['master'], initial_heads['mounted']) 31 | 32 | with open(path.join(self.mount_point, 'bigfile'), 'wb') as f: 33 | filesize = filesystem.WRITE_BUFFER_SIZE * 3 34 | for c in xrange(filesize / 4096 + 1): 35 | f.write('asdf' * 1024) 36 | 37 | git = self.git_repo() 38 | current_heads = { 39 | "master": git.refs['refs/heads/master'], 40 | "mounted": git.refs['refs/heads/mounted'], 41 | } 42 | self.assertEqual(initial_heads['master'], current_heads['master']) 43 | self.assertNotEqual(current_heads['master'], current_heads['mounted']) 44 | self.assertNotEqual(initial_heads['mounted'], current_heads['mounted']) 45 | 46 | self.umount() 47 | 48 | git = self.git_repo() 49 | final_heads = { 50 | "master": git.refs['refs/heads/master'], 51 | } 52 | self.assertRaises(KeyError, lambda: git.refs['refs/heads/mounted']) 53 | self.assertNotEqual(final_heads['master'], current_heads['master']) 54 | 55 | def test_no_modifications(self): 56 | self.mount() 57 | git = self.git_repo() 58 | initial_master = git.refs['refs/heads/master'] 59 | 60 | self.umount() 61 | git = self.git_repo() 62 | final_master = git.refs['refs/heads/master'] 63 | 64 | self.assertEqual(final_master, initial_master) 65 | -------------------------------------------------------------------------------- /spaghettifs/tests/support.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tempfile 3 | import shutil 4 | import os 5 | import logging 6 | import random 7 | import struct 8 | from cStringIO import StringIO 9 | 10 | from spaghettifs import storage 11 | from spaghettifs import easygit 12 | from spaghettifs import treetree 13 | 14 | stderr_handler = logging.StreamHandler() 15 | stderr_handler.setLevel(logging.ERROR) 16 | logging.getLogger('spaghettifs').addHandler(stderr_handler) 17 | 18 | class SpaghettiTestCase(unittest.TestCase): 19 | def setUp(self): 20 | self.tmpdir = tempfile.mkdtemp() 21 | self.repo_path = os.path.join(self.tmpdir, 'repo.sfs') 22 | 23 | os.mkdir(self.repo_path) 24 | eg = easygit.EasyGit.new_repo(self.repo_path, bare=True) 25 | with eg.root as root: 26 | with root.new_tree('inodes') as inodes: 27 | inodes_tt = treetree.TreeTree(inodes, prefix='it') 28 | def make_file_inode(inode_name, contents): 29 | with inodes_tt.new_tree(inode_name[1:]) as i1: 30 | b0 = i1.new_tree('bt1').new_blob('0') 31 | b0.data = contents 32 | meta = i1.new_blob('meta') 33 | meta.data = ('mode: 0100644\n' 34 | 'nlink: 1\n' 35 | 'uid: 0\n' 36 | 'gid: 0\n' 37 | 'size: %(size)d\n') % { 38 | 'size': len(contents), 39 | } 40 | 41 | make_file_inode('i1', 'text file "a"\n') 42 | make_file_inode('i2', 'file D!\n') 43 | make_file_inode('i3', 'the E file\n') 44 | make_file_inode('i4', 'F is here\n') 45 | 46 | root.new_blob('features').data = '{}' 47 | features = storage.FeatureBlob(root['features']) 48 | features['next_inode_number'] = 5 49 | features['inode_index_format'] = 'treetree' 50 | features['inode_format'] = 'treetree' 51 | 52 | root.new_blob('root.ls').data = 'a.txt i1\nb /\n' 53 | with root.new_tree('root.sub') as root_sub: 54 | root_sub.new_blob('b.ls').data = 'c /\nf.txt i4\n' 55 | with root_sub.new_tree('b.sub') as b_sub: 56 | b_sub.new_blob('c.ls').data = 'd.txt i2\ne.txt i3\n' 57 | 58 | eg.commit("Spaghetti User ", 59 | 'Created empty filesystem') 60 | 61 | self.repo = storage.GitStorage(self.repo_path) 62 | 63 | def tearDown(self): 64 | shutil.rmtree(self.tmpdir) 65 | 66 | def randomdata(size): 67 | f = StringIO() 68 | for c in xrange(size / 8 + 1): 69 | f.write(struct.pack('Q', random.getrandbits(64))) 70 | return f.getvalue()[:size] 71 | 72 | def setup_logger(log_level): 73 | import logging 74 | stderr_handler = logging.StreamHandler() 75 | stderr_handler.setLevel(getattr(logging, log_level)) 76 | logging.getLogger('spaghettifs').addHandler(stderr_handler) 77 | -------------------------------------------------------------------------------- /spaghettifs/treetree.py: -------------------------------------------------------------------------------- 1 | """ 2 | TreeTree is a wrapper over `easygit.EasyTree` that provides more efficient 3 | storage of lists. Keys must be strings made up of digits, and they should 4 | be as close as possible to the indices of a list. 5 | """ 6 | 7 | class TreeTree(object): 8 | is_tree = True 9 | 10 | def __init__(self, container, prefix='tt'): 11 | self.container = container 12 | self.prefix = prefix 13 | 14 | def walk(self, name, look): 15 | check_name(name) 16 | keys = ['%s%d' % (self.prefix, len(name))] + list(name) 17 | last_key = keys.pop() 18 | ikeys = iter(keys) 19 | def step(node): 20 | assert node.is_tree 21 | try: 22 | key = next(ikeys) 23 | except StopIteration: 24 | return look(node, last_key, True, lambda nextnode: nextnode) 25 | else: 26 | return look(node, key, False, step) 27 | return step(self.container) 28 | 29 | def new_tree(self, name): 30 | def look(node, key, last, step): 31 | try: 32 | nextnode = node[key] 33 | except KeyError: 34 | nextnode = node.new_tree(key) 35 | return step(nextnode) 36 | 37 | value = self.walk(name, look) 38 | if not value.is_tree: 39 | raise ValueError 40 | return value 41 | 42 | def new_blob(self, name): 43 | def look(node, key, last, step): 44 | try: 45 | nextnode = node[key] 46 | except KeyError: 47 | if last: 48 | nextnode = node.new_blob(key) 49 | else: 50 | nextnode = node.new_tree(key) 51 | return step(nextnode) 52 | 53 | value = self.walk(name, look) 54 | if value.is_tree: 55 | raise ValueError 56 | return value 57 | 58 | def clone(self, source, name): 59 | def look(node, key, last, step): 60 | try: 61 | nextnode = node[key] 62 | except KeyError: 63 | if last: 64 | nextnode = node.clone(source, key) 65 | else: 66 | nextnode = node.new_tree(key) 67 | return step(nextnode) 68 | 69 | value = self.walk(name, look) 70 | if source.is_tree and not value.is_tree: 71 | raise ValueError 72 | if not source.is_tree and value.is_tree: 73 | raise ValueError 74 | return value 75 | 76 | def __getitem__(self, name): 77 | def look(node, key, last, step): 78 | return step(node[key]) 79 | 80 | return self.walk(name, look) 81 | 82 | def __contains__(self, name): 83 | try: 84 | self[name] 85 | except KeyError: 86 | return False 87 | else: 88 | return True 89 | 90 | def __delitem__(self, name): 91 | def look(node, key, last, step): 92 | if last: 93 | del node[key] 94 | return 95 | 96 | nextnode = node[key] 97 | step(nextnode) 98 | if not nextnode.keys(): 99 | del node[key] 100 | 101 | return self.walk(name, look) 102 | 103 | def remove(self): 104 | return self.container.remove() 105 | 106 | def check_name(name): 107 | if not name: 108 | raise ValueError('Blank names not allowed: %r' % name) 109 | if not isinstance(name, basestring): 110 | raise ValueError('Names must be strings: %r' % name) 111 | -------------------------------------------------------------------------------- /spaghettifs/tests/test_treetree.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tempfile 3 | import shutil 4 | 5 | import dulwich 6 | from support import setup_logger 7 | from spaghettifs.easygit import EasyGit, EasyTree, EasyBlob 8 | from spaghettifs.treetree import TreeTree 9 | 10 | class BasicTestCase(unittest.TestCase): 11 | def setUp(self): 12 | self.repo_path = tempfile.mkdtemp() 13 | self.eg = EasyGit.new_repo(self.repo_path, bare=True) 14 | self.tt = TreeTree(self.eg.root.new_tree('tt')) 15 | 16 | def tearDown(self): 17 | shutil.rmtree(self.repo_path) 18 | 19 | def commit(self): 20 | self.eg.commit(author="Spaghetti User ", 21 | message="test commit") 22 | 23 | def test_valid_ids(self): 24 | self.assertRaises(ValueError, self.tt.new_blob, '') 25 | self.assertRaises(ValueError, self.tt.new_blob, 1234) 26 | try: 27 | self.tt.new_blob('12') 28 | self.tt.new_blob('asdf') 29 | except ValueError: 30 | self.fail('Should not raise exception') 31 | 32 | def test_create_retrieve_blobs(self): 33 | for name in ['345', '7', '22', '549', '0']: 34 | self.assertTrue(name not in self.tt) 35 | self.tt.new_blob(name).data = 'asdf' 36 | self.commit() 37 | self.assertTrue(name in self.tt) 38 | self.assertEqual(self.tt[name].data, 'asdf') 39 | self.assertTrue(name not in self.eg.root['tt']) 40 | 41 | def test_create_retrieve_trees(self): 42 | for name in ['24', '9', '873', '22']: 43 | self.assertTrue(name not in self.tt) 44 | self.tt.new_tree(name).new_blob('c').data = 'qwer' 45 | self.commit() 46 | self.assertTrue(name in self.tt) 47 | self.assertEqual(self.tt[name]['c'].data, 'qwer') 48 | self.assertTrue(name not in self.eg.root['tt']) 49 | 50 | def test_structure(self): 51 | raw_tt = self.eg.root['tt'] 52 | self.tt.new_tree('123') 53 | self.assertTrue(isinstance(raw_tt['tt3']['1']['2']['3'], EasyTree)) 54 | self.tt.new_blob('22') 55 | self.assertTrue(isinstance(raw_tt['tt2']['2']['2'], EasyBlob)) 56 | self.assertRaises(KeyError, lambda: self.tt['33']) 57 | 58 | def test_overwrite(self): 59 | self.tt.new_tree('123') 60 | self.assertRaises(ValueError, self.tt.new_blob, '123') 61 | 62 | def test_clone(self): 63 | blobby = self.eg.root.new_blob('blobby') 64 | blobby.data = 'blobby data' 65 | self.tt.clone(blobby, '1234') 66 | blobby.data = 'qwer' 67 | self.assertEqual(self.tt['1234'].data, 'blobby data') 68 | self.assertEqual(self.eg.root['tt']['tt4']['1']['2']['3']['4'].data, 69 | 'blobby data') 70 | 71 | def test_remove(self): 72 | raw_tt = self.eg.root['tt'] 73 | for name in ['345', '7', '22', '549']: 74 | self.tt.new_blob(name).data = 'asdf' 75 | 76 | self.assertTrue('345' in self.tt) 77 | self.assertTrue('tt3' in raw_tt) 78 | self.assertTrue('3' in raw_tt['tt3']) 79 | del self.tt['345'] 80 | self.assertTrue('345' not in self.tt) 81 | self.assertTrue('tt3' in raw_tt) 82 | self.assertTrue('3' not in raw_tt['tt3']) 83 | 84 | self.assertTrue('7' in self.tt) 85 | self.assertTrue('tt1' in raw_tt) 86 | del self.tt['7'] 87 | self.assertTrue('7' not in self.tt) 88 | self.assertTrue('tt1' not in raw_tt) 89 | 90 | self.assertRaises(KeyError, lambda: self.tt['345']) 91 | self.assertRaises(KeyError, lambda: self.tt['7']) 92 | self.assertEqual(self.tt['22'].data, 'asdf') 93 | self.assertEqual(self.tt['549'].data, 'asdf') 94 | 95 | if __name__ == '__main__': 96 | setup_logger('ERROR') 97 | unittest.main() 98 | -------------------------------------------------------------------------------- /spaghettifs/benchmark/bench.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import tempfile 3 | import os 4 | from os import path 5 | import shutil 6 | import logging 7 | import time 8 | import collections 9 | 10 | from spaghettifs import storage 11 | from spaghettifs import filesystem 12 | from spaghettifs.tests import test_filesystem 13 | import Queue 14 | 15 | log = logging.getLogger('spaghettifs.bench') 16 | log.setLevel(logging.DEBUG) 17 | 18 | class LogWatcher(object): 19 | level = logging.DEBUG 20 | def __init__(self): 21 | logging.getLogger('spaghettifs').addHandler(self) 22 | self.stats = collections.defaultdict(int) 23 | 24 | def handle(self, record): 25 | self.stats['log_count'] += 1 26 | if 'loading git tree' in record.msg: 27 | self.stats['tree_loads'] += 1 28 | elif 'loading git blob' in record.msg: 29 | self.stats['blob_loads'] += 1 30 | elif 'Loaded inode' in record.msg: 31 | self.stats['inode_loads'] += 1 32 | 33 | def report(self): 34 | return self.stats 35 | 36 | def fs_mount(repo_path, mount_path, stats_queue): 37 | log_watcher = LogWatcher() 38 | time0 = time.time() 39 | clock0 = time.clock() 40 | filesystem.mount(repo_path, mount_path) 41 | stats = {'time': time.time() - time0, 42 | 'clock': time.clock() - clock0} 43 | stats.update(log_watcher.report()) 44 | stats_queue.put(stats) 45 | 46 | class TempFS(object): 47 | def __enter__(self): 48 | self.temp_path = tempfile.mkdtemp() 49 | log.debug('setting up temporary filesystem at %r', self.temp_path) 50 | 51 | self.mount_path = path.join(self.temp_path, 'mnt') 52 | os.mkdir(self.mount_path) 53 | 54 | self.repo_path = path.join(self.temp_path, 'repo.sfs') 55 | storage.GitStorage.create(self.repo_path) 56 | 57 | return self 58 | 59 | def __exit__(self, *args): 60 | log.debug('cleaning up temporary filesystem at %r', self.temp_path) 61 | shutil.rmtree(self.temp_path) 62 | 63 | def measure(self, do_work): 64 | return measure(do_work, self.repo_path, self.mount_path) 65 | 66 | def measure(do_work, repo_path, mount_path): 67 | stats_queue = multiprocessing.Queue() 68 | log.debug('starting mounter process; repo_path=%r, mount_path=%r', 69 | mount_path, repo_path) 70 | args = (repo_path, mount_path, stats_queue) 71 | p = multiprocessing.Process(target=fs_mount, args=args) 72 | p.start() 73 | if test_filesystem.wait_for_mount(mount_path): 74 | log.debug('mount successful') 75 | else: 76 | log.error('mount failed') 77 | return 78 | 79 | try: 80 | do_work(mount_path) 81 | except: 82 | log.debug('caught exception; doing cleanup and re-raising') 83 | raise 84 | finally: 85 | log.debug('running unmount command') 86 | test_filesystem.do_umount(mount_path) 87 | log.debug('waiting for mounter stats, timeout=2') 88 | try: 89 | stats = stats_queue.get(timeout=2) 90 | except Queue.Empty: 91 | log.error('timeout while waiting for stats') 92 | stats = None 93 | log.debug('received stats: %r', stats) 94 | log.debug('joining mounter process, timeout=2') 95 | p.join(timeout=2) 96 | if p.is_alive(): 97 | log.error('joining child process failed') 98 | else: 99 | log.debug('join successful') 100 | 101 | return stats 102 | 103 | def log_to_stderr(debug=False): 104 | handler = logging.StreamHandler() 105 | handler.setLevel(logging.DEBUG if debug else logging.INFO) 106 | log.addHandler(handler) 107 | 108 | if __name__ == '__main__': 109 | def blanktest(mount_path): 110 | print 'performing blank test at', mount_path 111 | 112 | log_to_stderr(debug=True) 113 | with TempFS() as tfs: 114 | print tfs.measure(blanktest) 115 | -------------------------------------------------------------------------------- /spaghettifs/filesystem.py: -------------------------------------------------------------------------------- 1 | import os 2 | from errno import ENOENT, EPERM 3 | from stat import S_IFDIR, S_IFREG 4 | from time import time 5 | import logging 6 | from datetime import datetime 7 | import threading 8 | import functools 9 | import collections 10 | 11 | from fuse import FUSE, Operations 12 | from storage import GitStorage 13 | 14 | log = logging.getLogger('spaghettifs.filesystem') 15 | log.setLevel(logging.DEBUG) 16 | 17 | WRITE_BUFFER_SIZE = 3 * 1024 * 1024 # 3MB 18 | 19 | def memoize(size): 20 | memo = collections.deque(maxlen=size) 21 | 22 | def decorator(f): 23 | @functools.wraps(f) 24 | def wrapper(*args): 25 | for key, value in memo: 26 | if key == args: 27 | break 28 | else: 29 | value = f(*args) 30 | memo.append( (args, value) ) 31 | 32 | return value 33 | 34 | wrapper.flush_memo = memo.clear 35 | return wrapper 36 | 37 | return decorator 38 | 39 | class SpaghettiFS(Operations): 40 | def __init__(self, repo): 41 | self.repo = repo 42 | self._write_count = 0 43 | # the FUSE library seems to assume we're thread-safe, so we use a 44 | # big fat lock, just in case 45 | self._lock = threading.Lock() 46 | 47 | @memoize(10) 48 | def get_obj(self, path): 49 | #assert(path.startswith('/')) 50 | obj = self.repo.get_root() 51 | for frag in path[1:].split('/'): 52 | if frag == '': 53 | continue 54 | try: 55 | obj = obj[frag] 56 | except KeyError: 57 | return None 58 | 59 | return obj 60 | 61 | def getattr(self, path, fh=None): 62 | obj = self.get_obj(path) 63 | if obj is None: 64 | raise OSError(ENOENT, '') 65 | 66 | if obj.is_dir: 67 | st = dict(st_mode=(S_IFDIR | 0755), st_nlink=2) 68 | else: 69 | st = dict(st_mode=(S_IFREG | 0444), st_size=obj.size) 70 | st['st_nlink'] = obj.inode['nlink'] 71 | 72 | # FUSE seeems to ignore our st_ino 73 | #st['st_ino'] = int(obj.inode.name[1:]) 74 | 75 | st['st_ctime'] = st['st_mtime'] = st['st_atime'] = time() 76 | return st 77 | 78 | def create(self, path, mode): 79 | parent_path, file_name = os.path.split(path) 80 | parent = self.get_obj(parent_path) 81 | parent.create_file(file_name) 82 | self.get_obj.flush_memo() 83 | return 0 84 | 85 | def link(self, target, source): 86 | source_obj = self.get_obj(source) 87 | target_parent_obj = self.get_obj(os.path.dirname(target)) 88 | target_parent_obj.link_file(os.path.basename(target), source_obj) 89 | self.get_obj.flush_memo() 90 | 91 | def mkdir(self, path, mode): 92 | parent_path, dir_name = os.path.split(path) 93 | parent = self.get_obj(parent_path) 94 | parent.create_directory(dir_name) 95 | self.get_obj.flush_memo() 96 | 97 | def read(self, path, size, offset, fh): 98 | obj = self.get_obj(path) 99 | if obj is None or obj.is_dir: 100 | return '' 101 | else: 102 | return obj.read_data(offset, size) 103 | 104 | def readdir(self, path, fh): 105 | obj = self.get_obj(path) 106 | return ['.', '..'] + list(obj.keys()) 107 | 108 | def rename(self, source, target): 109 | source_obj = self.get_obj(source) 110 | if source_obj.is_dir: 111 | raise OSError(EPERM, '') 112 | target_parent_obj = self.get_obj(os.path.dirname(target)) 113 | target_parent_obj.link_file(os.path.basename(target), source_obj) 114 | source_obj.unlink() 115 | self.get_obj.flush_memo() 116 | 117 | def rmdir(self, path): 118 | obj = self.get_obj(path) 119 | if obj is None or not obj.is_dir: 120 | return 121 | 122 | obj.unlink() 123 | self.get_obj.flush_memo() 124 | 125 | def truncate(self, path, length, fh=None): 126 | obj = self.get_obj(path) 127 | if obj is None or obj.is_dir: 128 | return 129 | 130 | obj.truncate(length) 131 | 132 | def unlink(self, path): 133 | obj = self.get_obj(path) 134 | if obj is None or obj.is_dir: 135 | return 136 | 137 | obj.unlink() 138 | self.get_obj.flush_memo() 139 | 140 | def write(self, path, data, offset, fh): 141 | obj = self.get_obj(path) 142 | if obj is None or obj.is_dir: 143 | return 0 144 | 145 | obj.write_data(data, offset) 146 | 147 | if not self.repo.autocommit: 148 | self._write_count += len(data) 149 | if self._write_count > WRITE_BUFFER_SIZE: 150 | self.repo.commit(amend=True, branch="mounted") 151 | self._write_count = 0 152 | 153 | return len(data) 154 | 155 | # access = None 156 | flush = None 157 | getxattr = None 158 | listxattr = None 159 | open = None 160 | opendir = None 161 | release = None 162 | releasedir = None 163 | statfs = None 164 | 165 | def __call__(self, op, path, *args): 166 | log.debug('FUSE api call: %r %r %r', 167 | op, path, tuple(LogWrap(arg) for arg in args)) 168 | ret = '[Unknown Error]' 169 | self._lock.acquire() 170 | try: 171 | ret = super(SpaghettiFS, self).__call__(op, path, *args) 172 | return ret 173 | except OSError, e: 174 | ret = str(e) 175 | raise 176 | finally: 177 | self._lock.release() 178 | log.debug('FUSE api return: %r %r', op, LogWrap(ret)) 179 | 180 | class LogWrap(object): 181 | def __init__(self, value): 182 | self.value = value 183 | 184 | def __repr__(self): 185 | if isinstance(self.value, basestring) and len(self.value) > 20: 186 | r = repr(self.value[:12]) 187 | return '%s[...(len=%d)]%s' % (r[:11], len(self.value), r[-1]) 188 | else: 189 | return repr(self.value) 190 | 191 | def __str__(self): 192 | return repr(self) 193 | 194 | datefmt = lambda dt: dt.strftime('%Y-%m-%d %H:%M:%S') 195 | 196 | class _open_fs(object): 197 | def __init__(self, repo_path, cls): 198 | self.repo_path = repo_path 199 | self.cls = cls 200 | 201 | def __enter__(self): 202 | self.time_mount = datetime.now() 203 | 204 | self.repo = GitStorage(self.repo_path, autocommit=False) 205 | self.git = self.repo.eg.git 206 | 207 | master_id = self.git.refs['refs/heads/master'] 208 | self.initial_tree_id = self.git.commit(master_id).tree 209 | 210 | msg = ("[temporary commit; currently mounted, since %s]" % 211 | datefmt(self.time_mount)) 212 | self.repo.commit(msg, branch="mounted", head_id=master_id) 213 | 214 | return self.cls(self.repo) 215 | 216 | def __exit__(self, e0, e1, e2): 217 | self.time_unmount = datetime.now() 218 | 219 | msg = ("Mounted operations:\n mounted at %s\n unmounted at %s\n" % 220 | (datefmt(self.time_mount), datefmt(self.time_unmount))) 221 | self.repo.commit(msg, amend=True, branch="mounted") 222 | 223 | mounted_id = self.git.refs['refs/heads/mounted'] 224 | mounted_tree_id = self.git.commit(mounted_id).tree 225 | 226 | if mounted_tree_id != self.initial_tree_id: 227 | self.git.refs['refs/heads/master'] = mounted_id 228 | 229 | del self.git.refs['refs/heads/mounted'] 230 | 231 | def mount(repo_path, mount_path, cls=SpaghettiFS, loglevel=logging.ERROR): 232 | if loglevel is not None: 233 | stderr_handler = logging.StreamHandler() 234 | stderr_handler.setLevel(loglevel) 235 | logging.getLogger('spaghettifs').addHandler(stderr_handler) 236 | 237 | with _open_fs(repo_path, cls) as fs: 238 | FUSE(fs, mount_path, foreground=True) 239 | -------------------------------------------------------------------------------- /spaghettifs/tests/test_filesystem.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from os import path 4 | import sys 5 | import subprocess 6 | import time 7 | from errno import EPERM 8 | 9 | from support import SpaghettiTestCase, randomdata 10 | 11 | def wait_for_mount(mount_path): 12 | for c in xrange(20): 13 | if path.ismount(mount_path): 14 | return True 15 | time.sleep(.1) 16 | else: 17 | return False 18 | 19 | def do_umount(mount_path): 20 | if sys.platform == 'darwin': 21 | cmd = ['umount', mount_path] 22 | elif sys.platform == 'linux2': 23 | cmd = ['fusermount', '-u', '-z', mount_path] 24 | else: 25 | raise ValueError("Don't know how to unmount a fuse filesystem") 26 | 27 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 28 | return p.communicate()[0] 29 | 30 | class SpaghettiMountTestCase(SpaghettiTestCase): 31 | script_tmpl = "from spaghettifs.filesystem import mount; mount(%s, %s)" 32 | 33 | mounted = False 34 | 35 | def mount(self): 36 | self.mount_point = path.join(self.tmpdir, 'mnt') 37 | os.mkdir(self.mount_point) 38 | script = self.script_tmpl % (repr(self.repo_path), 39 | repr(self.mount_point)) 40 | self.fsmount = subprocess.Popen([sys.executable, '-c', script], 41 | stdout=subprocess.PIPE, 42 | stderr=subprocess.STDOUT) 43 | # wait for mount operation to complete 44 | if not wait_for_mount(self.mount_point): 45 | if self.fsmount.poll(): 46 | self._output = self.fsmount.communicate()[0] 47 | raise AssertionError('Filesystem did not mount after 2 seconds') 48 | 49 | self.mounted = True 50 | 51 | def umount(self): 52 | msg = do_umount(path.realpath(self.mount_point)) 53 | self._output = self.fsmount.communicate()[0] 54 | 55 | self.mounted = False 56 | 57 | class BasicFilesystemOps(SpaghettiMountTestCase): 58 | def setUp(self): 59 | super(BasicFilesystemOps, self).setUp() 60 | self.mount() 61 | 62 | def tearDown(self): 63 | self.umount() 64 | super(BasicFilesystemOps, self).tearDown() 65 | 66 | def test_listing(self): 67 | ls = os.listdir(self.mount_point) 68 | self.assertEqual(set(ls), set(['a.txt', 'b'])) 69 | 70 | def test_read_file(self): 71 | data = open(path.join(self.mount_point, 'a.txt')).read() 72 | self.assertEqual(data, 'text file "a"\n') 73 | 74 | def test_write_file(self): 75 | new_file_path = path.join(self.mount_point, 'newfile') 76 | self.assertFalse('newfile' in os.listdir(self.mount_point)) 77 | 78 | f = open(new_file_path, 'wb') 79 | self.assertTrue('newfile' in os.listdir(self.mount_point)) 80 | self.assertEqual(os.stat(new_file_path).st_size, 0) 81 | self.assertEqual(open(new_file_path).read(), '') 82 | 83 | f.write('something here!') 84 | f.flush() 85 | self.assertEqual(os.stat(new_file_path).st_size, 15) 86 | self.assertEqual(open(new_file_path).read(), 'something here!') 87 | 88 | f.seek(10) 89 | f.write('there!') 90 | f.flush() 91 | self.assertEqual(os.stat(new_file_path).st_size, 16) 92 | self.assertEqual(open(new_file_path).read(), 'something there!') 93 | 94 | f.truncate(9) 95 | f.flush() 96 | self.assertEqual(os.stat(new_file_path).st_size, 9) 97 | self.assertEqual(open(new_file_path).read(), 'something') 98 | 99 | f.seek(15) 100 | f.write('else') 101 | f.flush() 102 | self.assertEqual(os.stat(new_file_path).st_size, 19) 103 | self.assertEqual(open(new_file_path).read(), 'something\0\0\0\0\0\0else') 104 | 105 | def test_large_data(self): 106 | _64K = 64*1024 107 | _1M = 1024*1024 108 | test_file_path = path.join(self.mount_point, 'newfile2') 109 | test_data = randomdata(_1M) 110 | f = open(test_file_path, 'wb') 111 | for c in xrange(0, _1M, _64K): 112 | f.write(test_data[c:c+_64K]) 113 | f.close() 114 | 115 | f2 = open(test_file_path, 'rb') 116 | for c in xrange(0, _1M, _64K): 117 | d = f2.read(_64K) 118 | self.assertEqual(d, test_data[c:c+_64K]) 119 | f2.close() 120 | 121 | def test_unlink(self): 122 | new_file_path = path.join(self.mount_point, 'newfile') 123 | f = open(new_file_path, 'wb') 124 | f.write('hey') 125 | f.close() 126 | self.assertTrue('newfile' in os.listdir(self.mount_point)) 127 | os.unlink(new_file_path) 128 | self.assertFalse('newfile' in os.listdir(self.mount_point)) 129 | 130 | def test_mkdir_listdir_rmdir(self): 131 | new_dir_path = path.join(self.mount_point, 'newdir') 132 | self.assertFalse('newdir' in os.listdir(self.mount_point)) 133 | 134 | os.mkdir(new_dir_path) 135 | self.assertTrue('newdir' in os.listdir(self.mount_point)) 136 | self.assertEqual(os.listdir(new_dir_path), []) 137 | 138 | os.rmdir(new_dir_path) 139 | self.assertFalse('newdir' in os.listdir(self.mount_point)) 140 | 141 | def test_link(self): 142 | orig_path = path.join(self.mount_point, 'orig') 143 | linked_path = path.join(self.mount_point, 'linked') 144 | 145 | f = open(orig_path, 'wb') 146 | f.write('hey') 147 | f.close() 148 | self.assertEqual(os.stat(orig_path).st_nlink, 1) 149 | 150 | os.link(orig_path, linked_path) 151 | self.assertEqual(os.stat(orig_path).st_nlink, 2) 152 | # FUSE seems to mangle st_ino 153 | #self.assertEqual(os.stat(orig_path).st_ino, 154 | # os.stat(linked_path).st_ino) 155 | 156 | f = open(orig_path, 'wb') 157 | f.write('asdf') 158 | f.close() 159 | f = open(linked_path, 'rb') 160 | linked_data = f.read() 161 | f.close() 162 | self.assertEqual(linked_data, 'asdf') 163 | 164 | os.unlink(orig_path) 165 | self.assertEqual(os.stat(linked_path).st_nlink, 1) 166 | 167 | def test_rename_file(self): 168 | orig_path = path.join(self.mount_point, 'orig') 169 | new_path = path.join(self.mount_point, 'linked') 170 | 171 | f = open(orig_path, 'wb') 172 | f.write('hey') 173 | f.close() 174 | self.assertEqual(os.stat(orig_path).st_nlink, 1) 175 | 176 | self.assertTrue(path.isfile(orig_path)) 177 | self.assertFalse(path.isfile(new_path)) 178 | 179 | os.rename(orig_path, new_path) 180 | 181 | self.assertFalse(path.isfile(orig_path)) 182 | self.assertTrue(path.isfile(new_path)) 183 | self.assertEqual(os.stat(new_path).st_nlink, 1) 184 | 185 | f = open(new_path, 'rb') 186 | data = f.read() 187 | f.close() 188 | self.assertEqual(data, 'hey') 189 | 190 | def test_not_permitted(self): 191 | myf_path = path.join(self.mount_point, 'myf') 192 | myf2_path = path.join(self.mount_point, 'myf2') 193 | 194 | os.mkdir(myf_path) 195 | 196 | try: 197 | os.rename(myf_path, myf2_path) 198 | except OSError, e: 199 | self.assertEqual(e.errno, EPERM) 200 | else: 201 | self.fail('OSError not raised') 202 | 203 | try: 204 | os.link(myf_path, myf2_path) 205 | except OSError, e: 206 | self.assertEqual(e.errno, EPERM) 207 | else: 208 | self.fail('OSError not raised') 209 | 210 | class FilesystemLoggingTestCase(unittest.TestCase): 211 | def test_custom_repr(self): 212 | from spaghettifs.filesystem import LogWrap 213 | self.assertEqual(repr(LogWrap('asdf')), repr('asdf')) 214 | self.assertEqual(repr(LogWrap('"')), repr('"')) 215 | self.assertEqual(repr(LogWrap('\'')), repr('\'')) 216 | self.assertEqual(repr(LogWrap(u'q')), repr(u'q')) 217 | self.assertEqual(repr(LogWrap('qwer'*64)), "'qwerqwerqw[...(len=256)]'") 218 | self.assertEqual(repr(LogWrap(u'asdf'*64)), "u'asdfasdfa[...(len=256)]'") 219 | self.assertEqual(repr(LogWrap(range(3))), '[0, 1, 2]') 220 | self.assertEqual(repr(LogWrap(range(100))), repr(range(100))) 221 | 222 | if __name__ == '__main__': 223 | unittest.main() 224 | -------------------------------------------------------------------------------- /spaghettifs/easygit.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | import weakref 3 | import logging 4 | import collections 5 | 6 | import dulwich 7 | 8 | log = logging.getLogger('spaghettifs.easygit') 9 | log.setLevel(logging.DEBUG) 10 | 11 | class EasyTree(object): 12 | is_tree = True 13 | 14 | def __init__(self, git_repo, git_id=None, parent=None, name=None): 15 | self.parent = parent 16 | self.name = name 17 | self.git = git_repo 18 | if git_id is None: 19 | log.debug('tree %r: creating blank git tree', self.name) 20 | git_tree = dulwich.objects.Tree() 21 | self.git.object_store.add_object(git_tree) 22 | git_id = git_tree.id 23 | log.debug('tree %r: loading git tree %r', self.name, git_id) 24 | self._git_tree = self.git.tree(git_id) 25 | self._ctx_count = 0 26 | self._loaded = dict() 27 | self._dirty = dict() 28 | 29 | def _set_dirty(self, name, value): 30 | log.debug('tree %r: setting dirty entry %r (%r)', 31 | self.name, name, value) 32 | if self.parent and not self._dirty: 33 | log.debug('tree %r: propagating "dirty" state', self.name) 34 | self.parent._set_dirty(self.name, self) 35 | self._dirty[name] = value 36 | 37 | def new_tree(self, name): 38 | log.debug('tree %r: creating child tree %r', self.name, name) 39 | t = EasyTree(self.git, None, self, name) 40 | self._set_dirty(name, t) 41 | return self[name] 42 | 43 | def new_blob(self, name): 44 | log.debug('tree %r: creating child blob %r', self.name, name) 45 | b = EasyBlob(self.git, None, self, name) 46 | self._set_dirty(name, b) 47 | return self[name] 48 | 49 | def clone(self, source, name): 50 | if isinstance(source, EasyTree): 51 | log.debug('tree %r: cloning tree %r as %r', 52 | self.name, source, name) 53 | cls = EasyTree 54 | elif isinstance(source, EasyBlob): 55 | log.debug('tree %r: cloning blob %r as %r', 56 | self.name, source, name) 57 | cls = EasyBlob 58 | else: 59 | raise NotImplementedError 60 | 61 | b = cls(self.git, source._commit(), self, name) 62 | self._set_dirty(name, b) 63 | return self[name] 64 | 65 | def __enter__(self): 66 | self._ctx_count += 1 67 | log.debug('tree %r: entering context manager (count=%d)', 68 | self.name, self._ctx_count) 69 | return self 70 | 71 | def __exit__(self, exc_type, exc_value, exc_traceback): 72 | log.debug('tree %r: exiting context manager (count=%d)', 73 | self.name, self._ctx_count) 74 | assert self._ctx_count > 0 75 | self._ctx_count -= 1 76 | 77 | def _commit(self): 78 | log.debug('tree %r: committing', self.name) 79 | assert self._ctx_count == 0 80 | 81 | for name, value in self._dirty.iteritems(): 82 | if value is None: 83 | log.debug('tree %r: removing entry %r', self.name, name) 84 | if name in self._git_tree: 85 | del self._git_tree[name] 86 | continue 87 | 88 | value_git_id = value._commit() 89 | if isinstance(value, EasyTree): 90 | log.debug('tree %r: updating tree %r', self.name, name) 91 | self._git_tree[name] = (040000, value_git_id) 92 | elif isinstance(value, EasyBlob): 93 | log.debug('tree %r: updating blob %r', self.name, name) 94 | self._git_tree[name] = (0100644, value_git_id) 95 | else: 96 | assert False 97 | 98 | self._dirty.clear() 99 | 100 | self.git.object_store.add_object(self._git_tree) 101 | git_id = self._git_tree.id 102 | log.debug('tree %r: finished commit, id=%r', self.name, git_id) 103 | return git_id 104 | 105 | def __getitem__(self, name): 106 | if name in self._loaded: 107 | value = self._loaded[name]() 108 | if value is None: 109 | log.debug('tree %r: weakref to %r has expired', 110 | self.name, name) 111 | del self._loaded[name] 112 | else: 113 | log.debug('tree %r: returning %r from cache', 114 | self.name, name) 115 | return value 116 | 117 | if name in self._dirty: 118 | value = self._dirty[name] 119 | if value is None: 120 | raise KeyError(name) 121 | log.debug('tree %r: returning %r from dirty', self.name, name) 122 | 123 | else: 124 | mode, child_git_id = self._git_tree[name] 125 | if mode == 040000: 126 | log.debug('tree %r: loading child tree %r', self.name, name) 127 | value = EasyTree(self.git, child_git_id, self, name) 128 | elif mode == 0100644: 129 | log.debug('tree %r: loading child blob %r', self.name, name) 130 | value = EasyBlob(self.git, child_git_id, self, name) 131 | else: 132 | raise ValueError('Unexpected mode %r' % mode) 133 | 134 | self._loaded[name] = weakref.ref(value) 135 | return value 136 | 137 | def __delitem__(self, name): 138 | self._set_dirty(name, None) 139 | if name in self._loaded: 140 | del self._loaded[name] 141 | 142 | def __iter__(self): 143 | for name in self.keys(): 144 | yield name 145 | 146 | def keys(self): 147 | names = set(name for name, e0, e1 in self._git_tree.iteritems()) 148 | names.update(set(self._dirty.iterkeys())) 149 | for name, value in self._dirty.iteritems(): 150 | if value is None: 151 | names.remove(name) 152 | 153 | return list(names) 154 | 155 | def remove(self): 156 | del self.parent[self.name] 157 | 158 | blob_cache = collections.deque(maxlen=10) 159 | 160 | class EasyBlob(object): 161 | is_tree = False 162 | _git_blob = None 163 | 164 | def __init__(self, git_repo, git_id=None, parent=None, name=None): 165 | blob_cache.append(self) 166 | self.parent = parent 167 | self.name = name 168 | self.git = git_repo 169 | if git_id is None: 170 | log.debug('blob %r: creating blank git blob', self.name) 171 | git_blob = dulwich.objects.Blob.from_string('') 172 | self.git.object_store.add_object(git_blob) 173 | git_id = git_blob.id 174 | log.debug('blob %r: loading git blob %r', self.name, git_id) 175 | self._git_id = git_id 176 | self._ctx_count = 0 177 | 178 | def __enter__(self): 179 | self._ctx_count += 1 180 | return self 181 | 182 | def __exit__(self, exc_type, exc_value, exc_traceback): 183 | assert self._ctx_count > 0 184 | self._ctx_count -= 1 185 | 186 | def _get_data(self): 187 | if self._git_blob is None: 188 | self._git_blob = self.git.get_blob(self._git_id) 189 | return self._git_blob.data 190 | 191 | def _set_data(self, value): 192 | log.debug('blob %r: updating value', self.name) 193 | self._git_id = None 194 | self._git_blob = dulwich.objects.Blob.from_string(value) 195 | self.parent._set_dirty(self.name, self) 196 | 197 | data = property(_get_data, _set_data) 198 | 199 | def remove(self): 200 | del self.parent[self.name] 201 | 202 | def _commit(self): 203 | assert self._ctx_count == 0 204 | 205 | if self._git_id is None: 206 | self.git.object_store.add_object(self._git_blob) 207 | self._git_id = self._git_blob.id 208 | del self._git_blob 209 | log.debug('blob %r: finished commit, id=%r', 210 | self.name, self._git_id) 211 | 212 | return self._git_id 213 | 214 | class EasyGit(object): 215 | def __init__(self, git_repo): 216 | self.git = git_repo 217 | try: 218 | git_commit_id = self.git.head() 219 | except: 220 | root_id = None 221 | else: 222 | git_commit = self.git.commit(self.git.head()) 223 | root_id = git_commit.tree 224 | 225 | self.root = EasyTree(self.git, root_id, None, '[ROOT]') 226 | 227 | def commit(self, author, message, parents=[], branch='master'): 228 | log.debug('easygit repo: starting commit') 229 | for parent_id in parents: 230 | assert self.git.commit(parent_id) 231 | 232 | root_git_id = self.root._commit() 233 | 234 | commit_time = int(time()) 235 | 236 | git_commit = dulwich.objects.Commit() 237 | git_commit.commit_time = commit_time 238 | git_commit.author_time = commit_time 239 | git_commit.commit_timezone = 2*60*60 240 | git_commit.author_timezone = 2*60*60 241 | git_commit.author = author 242 | git_commit.committer = author 243 | git_commit.message = message 244 | git_commit.encoding = "UTF-8" 245 | git_commit.tree = root_git_id 246 | git_commit.parents = parents 247 | 248 | self.git.object_store.add_object(git_commit) 249 | self.git.refs['refs/heads/%s' % branch] = git_commit.id 250 | log.debug('easygit repo: finished commit, id=%r', git_commit.id) 251 | 252 | def get_head_id(self, name="master"): 253 | return self.git.refs['refs/heads/%s' % name] 254 | 255 | @classmethod 256 | def new_repo(cls, repo_path, bare=False): 257 | log.debug('easygit creating repository at %r', repo_path) 258 | assert bare is True 259 | git_repo = dulwich.repo.Repo.init_bare(repo_path) 260 | return cls(git_repo) 261 | 262 | @classmethod 263 | def open_repo(cls, repo_path): 264 | log.debug('easygit opening repository at %r', repo_path) 265 | git_repo = dulwich.repo.Repo(repo_path) 266 | return cls(git_repo) 267 | -------------------------------------------------------------------------------- /spaghettifs/tests/test_easygit.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tempfile 3 | import shutil 4 | import os 5 | from time import time 6 | 7 | import dulwich 8 | from support import setup_logger 9 | from spaghettifs.easygit import EasyGit 10 | 11 | class BasicTestCase(unittest.TestCase): 12 | def setUp(self): 13 | self.repo_path = tempfile.mkdtemp() 14 | self.eg = EasyGit.new_repo(self.repo_path, bare=True) 15 | 16 | def tearDown(self): 17 | shutil.rmtree(self.repo_path) 18 | 19 | def test_init(self): 20 | expected_items = ['branches', 'config', 'objects', 'refs'] 21 | self.assertTrue(set(os.listdir(self.repo_path)), set(expected_items)) 22 | 23 | def test_initial_commit(self): 24 | self.eg.commit(author="Spaghetti User ", 25 | message="initial test commit") 26 | 27 | git = dulwich.repo.Repo(self.repo_path) 28 | git_h = git.head() 29 | git_c = git.commit(git_h) 30 | self.assertEqual(git_c.author, "Spaghetti User ") 31 | self.assertEqual(git_c.message, "initial test commit") 32 | self.assertEqual(git_c.get_parents(), []) 33 | 34 | def test_commit_with_ancestors(self): 35 | self.eg.commit(author="Spaghetti User ", 36 | message="initial test commit") 37 | 38 | head_id = self.eg.get_head_id() 39 | 40 | self.eg.commit(author="Spaghetti User ", 41 | message="second test commit", 42 | parents=[head_id]) 43 | 44 | self.assertRaises(AssertionError, self.eg.commit, 45 | author="Sneaky ", 46 | message="bad test commit", 47 | parents=['asdf']) 48 | 49 | git = dulwich.repo.Repo(self.repo_path) 50 | git_h = git.head() 51 | git_c2 = git.commit(git_h) 52 | self.assertEqual(len(git_c2.get_parents()), 1) 53 | git_c1 = git.commit(git_c2.get_parents()[0]) 54 | self.assertEqual(git_c1.get_parents(), []) 55 | 56 | def test_commit_with_tree(self): 57 | t1 = self.eg.root 58 | t2 = t1.new_tree('t2') 59 | self.eg.commit(author="Spaghetti User ", 60 | message="test commit with tree") 61 | 62 | git = dulwich.repo.Repo(self.repo_path) 63 | git_t = git.tree(git.commit(git.head()).tree) 64 | self.assertEqual(len(git_t.entries()), 1) 65 | self.assertEqual(git_t.entries()[0][:2], (040000, 't2')) 66 | 67 | def test_commit_with_blob(self): 68 | t1 = self.eg.root 69 | b1 = t1.new_blob('b1') 70 | with b1: 71 | b1.data = 'hello blob!' 72 | self.eg.commit(author="Spaghetti User ", 73 | message="test commit with blob") 74 | 75 | git = dulwich.repo.Repo(self.repo_path) 76 | git_t = git.tree(git.commit(git.head()).tree) 77 | self.assertEqual(len(git_t.entries()), 1) 78 | self.assertEqual(git_t.entries()[0][:2], (0100644, 'b1')) 79 | git_b = git.get_blob(git_t['b1'][1]) 80 | self.assertEqual(git_b.data, "hello blob!") 81 | 82 | def test_clone(self): 83 | t1 = self.eg.root 84 | left = t1.new_tree('left') 85 | 86 | left_blob = left.new_blob('blob') 87 | left_blob.data = 'left_blob orig data' 88 | left_tree = left.new_tree('tree') 89 | left_tree_blob = left_tree.new_blob('blob') 90 | left_tree_blob.data = 'left_tree_blob orig data' 91 | 92 | right = t1.new_tree('right') 93 | right_blob = right.clone(left_blob, 'blob') 94 | left_blob.data = 'left_blob other data' 95 | right_tree = right.clone(left_tree, 'tree') 96 | left_tree_blob.data = 'left_tree_blob other data' 97 | 98 | self.assertEqual(left_blob.data, 'left_blob other data') 99 | self.assertEqual(right_blob.data, 'left_blob orig data') 100 | self.assertEqual(left_tree_blob.data, 'left_tree_blob other data') 101 | self.assertEqual(right_tree['blob'].data, 'left_tree_blob orig data') 102 | 103 | self.eg.commit(author="Spaghetti User ", 104 | message="test commit") 105 | 106 | eg2 = EasyGit.open_repo(self.repo_path) 107 | self.assertEqual(eg2.root['left']['blob'].data, 'left_blob other data') 108 | self.assertEqual(eg2.root['right']['blob'].data, 'left_blob orig data') 109 | self.assertEqual(eg2.root['left']['tree']['blob'].data, 110 | 'left_tree_blob other data') 111 | self.assertEqual(eg2.root['right']['tree']['blob'].data, 112 | 'left_tree_blob orig data') 113 | 114 | class RetrievalTestCase(unittest.TestCase): 115 | def setUp(self): 116 | self.repo_path = tempfile.mkdtemp() 117 | git = dulwich.repo.Repo.init_bare(self.repo_path) 118 | git_t1 = dulwich.objects.Tree() 119 | git_t2 = dulwich.objects.Tree() 120 | git_b1 = dulwich.objects.Blob.from_string('b1 data') 121 | git_b2 = dulwich.objects.Blob.from_string('b2 data') 122 | git.object_store.add_object(git_b1) 123 | git.object_store.add_object(git_b2) 124 | git_t2['b2'] = (0100644, git_b2.id) 125 | git.object_store.add_object(git_t2) 126 | git_t1['b1'] = (0100644, git_b1.id) 127 | git_t1['t2'] = (040000, git_t2.id) 128 | git.object_store.add_object(git_t1) 129 | 130 | commit_time = int(time()) 131 | git_c = dulwich.objects.Commit() 132 | git_c.commit_time = commit_time 133 | git_c.author_time = commit_time 134 | git_c.commit_timezone = 2*60*60 135 | git_c.author_timezone = 2*60*60 136 | git_c.author = "Spaghetti User " 137 | git_c.committer = git_c.author 138 | git_c.message = "test fixture" 139 | git_c.encoding = "UTF-8" 140 | git_c.tree = git_t1.id 141 | git.object_store.add_object(git_c) 142 | git.refs['refs/heads/master'] = git_c.id 143 | 144 | self.eg = EasyGit.open_repo(self.repo_path) 145 | 146 | def tearDown(self): 147 | shutil.rmtree(self.repo_path) 148 | 149 | def test_fetch_objects(self): 150 | t1 = self.eg.root 151 | self.assertEqual(set(t1.keys()), set(['b1', 't2'])) 152 | b1 = t1['b1'] 153 | self.assertEqual(b1.data, 'b1 data') 154 | t2 = t1['t2'] 155 | self.assertEqual(set(t2.keys()), set(['b2'])) 156 | b2 = t2['b2'] 157 | self.assertEqual(b2.data, 'b2 data') 158 | 159 | def test_modify_tree(self): 160 | t1 = self.eg.root 161 | with t1['t2'] as t2: 162 | b3 = t2.new_blob('b3') 163 | with b3: 164 | b3.data = 'asdf' 165 | self.assertEqual(set(t2.keys()), set(['b2', 'b3'])) 166 | self.assertEqual(t2['b3'].data, 'asdf') 167 | self.eg.commit(author="Spaghetti User ", 168 | message="propagating changes") 169 | 170 | eg2 = EasyGit.open_repo(self.repo_path) 171 | self.assertEqual(eg2.root['t2']['b3'].data, 'asdf') 172 | 173 | def test_modify_blob(self): 174 | t1 = self.eg.root 175 | with t1['t2']['b2'] as b2: 176 | self.assertNotEqual(b2.data, 'qwer') 177 | b2.data = 'qwer' 178 | self.assertEqual(b2.data, 'qwer') 179 | 180 | with t1['t2']['b2'] as b2: 181 | self.assertEqual(b2.data, 'qwer') 182 | 183 | self.eg.commit(author="Spaghetti User ", 184 | message="propagating changes") 185 | 186 | eg2 = EasyGit.open_repo(self.repo_path) 187 | self.assertEqual(eg2.root['t2']['b2'].data, 'qwer') 188 | 189 | def test_modify_multiple(self): 190 | with self.eg.root as root: 191 | root['t2']['b2'].data = 'new b2' 192 | with root.new_tree('t3') as t3: 193 | t3.new_blob('b3').data = 'new b3' 194 | t3.new_blob('b4').data = 'new b4' 195 | with root.new_tree('t4') as t4: 196 | t4.new_blob('b5').data = 'new b5' 197 | root.new_blob('b6').data = 'new b6' 198 | self.eg.commit(author="Spaghetti User ", 199 | message="multiple changes") 200 | 201 | eg2 = EasyGit.open_repo(self.repo_path) 202 | root2 = eg2.root 203 | self.assertEqual(set(root2.keys()), 204 | set(['b1', 't2', 't3', 't4', 'b6'])) 205 | self.assertEqual(root2['b1'].data, 'b1 data') 206 | self.assertEqual(set(root2['t2'].keys()), set(['b2'])) 207 | self.assertEqual(root2['t2']['b2'].data, 'new b2') 208 | self.assertEqual(set(root2['t3'].keys()), set(['b3', 'b4'])) 209 | self.assertEqual(root2['t3']['b3'].data, 'new b3') 210 | self.assertEqual(root2['t3']['b4'].data, 'new b4') 211 | self.assertEqual(set(root2['t4'].keys()), set(['b5'])) 212 | self.assertEqual(root2['t4']['b5'].data, 'new b5') 213 | self.assertEqual(root2['b6'].data, 'new b6') 214 | 215 | def test_child_cache(self): 216 | root = self.eg.root 217 | 218 | t2a = root['t2'] 219 | t2b = root['t2'] 220 | self.assertTrue(t2a is t2b) 221 | 222 | t2a['b2'].data = 'asdf' 223 | self.assertEqual(t2b['b2'].data, 'asdf') 224 | 225 | b3a = t2a.new_blob('b3') 226 | b3a.data = 'b3 data' 227 | b3b = t2b['b3'] 228 | self.assertTrue(b3a is b3b) 229 | self.assertEqual(b3b.data, 'b3 data') 230 | 231 | t3a = t2a.new_tree('t3') 232 | t3a.new_blob('b4') 233 | t3b = t2b['t3'] 234 | self.assertTrue(t3a is t3b) 235 | self.assertEqual(set(t3b.keys()), set(['b4'])) 236 | 237 | def test_remove_entry(self): 238 | with self.eg.root as t1: 239 | with t1['t2'] as t2: 240 | del t2['b2'] 241 | del t1['b1'] 242 | 243 | self.eg.commit(author="Spaghetti User ", 244 | message="removing entries") 245 | 246 | eg2 = EasyGit.open_repo(self.repo_path) 247 | self.assertEqual(eg2.root.keys(), ['t2']) 248 | self.assertEqual(eg2.root['t2'].keys(), []) 249 | 250 | def test_self_remove_entry(self): 251 | with self.eg.root as t1: 252 | t1['t2'].remove() 253 | t1['b1'].remove() 254 | 255 | self.eg.commit(author="Spaghetti User ", 256 | message="removing entries") 257 | 258 | eg2 = EasyGit.open_repo(self.repo_path) 259 | self.assertEqual(eg2.root.keys(), []) 260 | 261 | def test_remove_and_fetch_entry(self): 262 | t1 = self.eg.root 263 | t2 = t1['t2'] 264 | t1['t2'].remove() 265 | self.assertRaises(KeyError, t1.__getitem__, 't2') 266 | 267 | class DelayedCommit(unittest.TestCase): 268 | def setUp(self): 269 | self.repo_path = tempfile.mkdtemp() 270 | self.eg = EasyGit.new_repo(self.repo_path, bare=True) 271 | 272 | def tearDown(self): 273 | shutil.rmtree(self.repo_path) 274 | 275 | def test_create_remove_blob(self): 276 | r = self.eg.root 277 | 278 | r.new_blob('b') 279 | del r['b'] 280 | r._commit() 281 | 282 | r.new_tree('t') 283 | del r['t'] 284 | r._commit() 285 | 286 | r.new_tree('t') 287 | r['t'].new_blob('b') 288 | r['t']['b'].data = 'asdf' 289 | del r['t'] 290 | r._commit() 291 | 292 | 293 | class ContextTestCase(unittest.TestCase): 294 | def setUp(self): 295 | self.repo_path = tempfile.mkdtemp() 296 | self.eg = EasyGit.new_repo(self.repo_path, bare=True) 297 | 298 | def tearDown(self): 299 | shutil.rmtree(self.repo_path) 300 | 301 | def test_nested(self): 302 | r = self.eg.root 303 | with r: 304 | with r: 305 | with r: 306 | self.assertEqual(r._ctx_count, 3) 307 | self.assertEqual(r._ctx_count, 2) 308 | self.assertEqual(r._ctx_count, 1) 309 | self.assertEqual(r._ctx_count, 0) 310 | self.assertRaises(AssertionError, r.__exit__, None, None, None) 311 | 312 | b = r.new_blob('b') 313 | with b: 314 | with b: 315 | with b: 316 | self.assertEqual(b._ctx_count, 3) 317 | self.assertEqual(b._ctx_count, 2) 318 | self.assertEqual(b._ctx_count, 1) 319 | self.assertEqual(b._ctx_count, 0) 320 | self.assertRaises(AssertionError, b.__exit__, None, None, None) 321 | 322 | def test_no_with(self): 323 | r = self.eg.root 324 | b = r.new_blob('b') 325 | b.data = 'asdf' 326 | 327 | class BranchTestCase(unittest.TestCase): 328 | def setUp(self): 329 | self.repo_path = tempfile.mkdtemp() 330 | self.eg = EasyGit.new_repo(self.repo_path, bare=True) 331 | 332 | def tearDown(self): 333 | shutil.rmtree(self.repo_path) 334 | 335 | def get_head(self, name): 336 | ref_path = os.path.join(self.repo_path, 'refs/heads', name) 337 | with open(ref_path, 'rb') as f: 338 | return f.read().strip() 339 | 340 | def test_various_commits(self): 341 | with self.eg.root as r: 342 | r.new_blob('bl').data = 'asdf' 343 | self.eg.commit(author="Spaghetti User ", 344 | message="commit on master") 345 | head1 = self.eg.get_head_id() 346 | self.assertEqual(head1, self.get_head('master')) 347 | 348 | with self.eg.root as r: 349 | r['bl'].data = 'qwer' 350 | self.eg.commit(author="Spaghetti User ", 351 | message="commit on secondary", 352 | branch="secondary") 353 | head2 = self.eg.get_head_id('secondary') 354 | self.assertEqual(head1, self.get_head('master')) 355 | self.assertEqual(head2, self.get_head('secondary')) 356 | 357 | if __name__ == '__main__': 358 | setup_logger('ERROR') 359 | unittest.main() 360 | -------------------------------------------------------------------------------- /spaghettifs/tests/test_storage.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import unittest 3 | import tempfile 4 | import shutil 5 | import random 6 | import json 7 | 8 | import dulwich 9 | 10 | from support import SpaghettiTestCase, setup_logger, randomdata 11 | from spaghettifs.storage import GitStorage, FeatureBlob 12 | from spaghettifs import treetree 13 | 14 | class BackendTestCase(SpaghettiTestCase): 15 | def test_walk(self): 16 | root = self.repo.get_root() 17 | self.assertTrue(root.is_dir) 18 | self.assertEqual(set(root.keys()), set(['a.txt', 'b'])) 19 | self.assertRaises(KeyError, lambda: root['nonexistent']) 20 | 21 | a_txt = root['a.txt'] 22 | self.assertFalse(a_txt.is_dir) 23 | self.assertEqual(a_txt.name, 'a.txt') 24 | self.assertEqual(a_txt.size, 14) 25 | self.assertEqual(a_txt._read_all_data(), 'text file "a"\n') 26 | self.assertEqual(a_txt.path, '/a.txt') 27 | 28 | b = root['b'] 29 | self.assertTrue(b.is_dir) 30 | self.assertEqual(set(b.keys()), set(['c', 'f.txt'])) 31 | self.assertEqual(b.path, '/b/') 32 | 33 | c = b['c'] 34 | self.assertTrue(c.is_dir) 35 | self.assertEqual(set(c.keys()), set(['d.txt', 'e.txt'])) 36 | self.assertEqual(c.path, '/b/c/') 37 | 38 | d = c['d.txt'] 39 | self.assertEqual(d.path, '/b/c/d.txt') 40 | 41 | def test_create_file(self): 42 | b = self.repo.get_root()['b'] 43 | g_txt = b.create_file('g.txt') 44 | self.assertFalse(g_txt.is_dir) 45 | self.assertEqual(g_txt.size, 0) 46 | self.assertEqual(g_txt._read_all_data(), '') 47 | self.assertEqual(g_txt.name, 'g.txt') 48 | 49 | repo2 = GitStorage(self.repo_path) 50 | g_txt_2 = repo2.get_root()['b']['g.txt'] 51 | self.assertFalse(g_txt_2.is_dir) 52 | self.assertEqual(g_txt_2.size, 0) 53 | self.assertEqual(g_txt_2._read_all_data(), '') 54 | self.assertEqual(g_txt_2.name, 'g.txt') 55 | 56 | def test_write_file_data(self): 57 | def assert_git_contents(data): 58 | repo2 = GitStorage(self.repo_path) 59 | h_txt_2 = repo2.get_root()['b']['h.txt'] 60 | self.assertEqual(h_txt_2.size, len(data)) 61 | self.assertEqual(h_txt_2._read_all_data(), data) 62 | 63 | b = self.repo.get_root()['b'] 64 | h_txt = b.create_file('h.txt') 65 | h_txt.write_data('hello git!', 0) 66 | self.assertEqual(h_txt.size, 10) 67 | self.assertEqual(h_txt._read_all_data(), 'hello git!') 68 | assert_git_contents('hello git!') 69 | 70 | h_txt.write_data(':)', 13) 71 | self.assertEqual(h_txt.size, 15) 72 | self.assertEqual(h_txt._read_all_data(), 'hello git!\0\0\0:)') 73 | assert_git_contents('hello git!\0\0\0:)') 74 | 75 | h_txt.truncate(17) 76 | self.assertEqual(h_txt.size, 17) 77 | self.assertEqual(h_txt._read_all_data(), 'hello git!\0\0\0:)\0\0') 78 | assert_git_contents('hello git!\0\0\0:)\0\0') 79 | 80 | h_txt.truncate(5) 81 | self.assertEqual(h_txt.size, 5) 82 | self.assertEqual(h_txt._read_all_data(), 'hello') 83 | assert_git_contents('hello') 84 | 85 | h_txt.write_data('-there', 5) 86 | self.assertEqual(h_txt.size, 11) 87 | self.assertEqual(h_txt._read_all_data(), 'hello-there') 88 | assert_git_contents('hello-there') 89 | 90 | def test_remove_file(self): 91 | c = self.repo.get_root()['b']['c'] 92 | self.assertEqual(set(c.keys()), set(['d.txt', 'e.txt'])) 93 | 94 | d_txt = c['d.txt'] 95 | d_txt.unlink() 96 | self.assertEqual(set(c.keys()), set(['e.txt'])) 97 | 98 | repo2 = GitStorage(self.repo_path) 99 | c_2 = repo2.get_root()['b']['c'] 100 | self.assertEqual(set(c_2.keys()), set(['e.txt'])) 101 | 102 | def test_make_directory(self): 103 | c = self.repo.get_root()['b']['c'] 104 | x = c.create_directory('x') 105 | self.assertEqual(set(c.keys()), set(['d.txt', 'e.txt', 'x'])) 106 | 107 | repo2 = GitStorage(self.repo_path) 108 | c_2 = repo2.get_root()['b']['c'] 109 | self.assertEqual(set(c_2.keys()), set(['d.txt', 'e.txt', 'x'])) 110 | 111 | y = x.create_file('y') 112 | y.write_data('ydata', 0) 113 | self.assertEqual(set(x.keys()), set(['y'])) 114 | self.assertEqual(y._read_all_data(), 'ydata') 115 | 116 | repo3 = GitStorage(self.repo_path) 117 | c_3 = repo3.get_root()['b']['c'] 118 | self.assertEqual(set(c_3.keys()), set(['d.txt', 'e.txt', 'x'])) 119 | x_3 = c_3['x'] 120 | self.assertEqual(set(x_3.keys()), set(['y'])) 121 | y_3 = x_3['y'] 122 | self.assertEqual(y_3._read_all_data(), 'ydata') 123 | 124 | x.unlink() 125 | 126 | repo4 = GitStorage(self.repo_path) 127 | c_4 = repo4.get_root()['b']['c'] 128 | self.assertEqual(set(c_4.keys()), set(['d.txt', 'e.txt'])) 129 | 130 | def test_empty_directory(self): 131 | c = self.repo.get_root()['b']['c'] 132 | x = c.create_directory('x') 133 | x.create_file('f') 134 | x['f'].unlink() 135 | self.assertEqual(set(x.keys()), set()) 136 | 137 | def test_30_files(self): 138 | b = self.repo.get_root()['b'] 139 | g = b.create_directory('g') 140 | for c in xrange(30): 141 | f = g.create_file('f_%d' % c) 142 | f.write_data('file contents %d' % c, 0) 143 | 144 | repo2 = GitStorage(self.repo_path) 145 | g2 = repo2.get_root()['b']['g'] 146 | for c in xrange(30): 147 | f2 = g2['f_%d' % c] 148 | self.assertEqual(f2._read_all_data(), 'file contents %d' % c) 149 | 150 | def test_dangerous_filenames(self): 151 | g = self.repo.get_root()['b'].create_directory('g') 152 | h = self.repo.get_root()['b'].create_directory('h') 153 | fail_names = ['.', '..', '/', '', 'as/df', 'x'*256] 154 | ok_names = [' ', 'ab ', ' cd', 'as\0df', 'qwe\tr', 'zc\nvb', '"', "'", 155 | '(', ')', '-', '+', '\\', '=', '?', '*', '.x', '..x', 156 | 'x'*255] 157 | 158 | for name in fail_names: 159 | self.assertRaises(ValueError, g.create_file, name) 160 | self.assertRaises(ValueError, h.create_directory, name) 161 | 162 | for name in ok_names: 163 | g.create_file(name).write_data(repr(name*2), 0) 164 | d = h.create_directory(name) 165 | d.create_file('afile') 166 | d.create_directory('adir') 167 | 168 | self.assertEqual(set(ok_names), set(g.keys())) 169 | self.assertEqual(set(ok_names), set(h.keys())) 170 | for name in ok_names: 171 | self.assertEqual(g[name]._read_all_data(), repr(name*2)) 172 | g[name].unlink() 173 | self.assertEqual(set(h[name].keys()), set(['afile', 'adir'])) 174 | h[name]['afile'].unlink() 175 | h[name]['adir'].unlink() 176 | self.assertEqual(list(h[name].keys()), []) 177 | h[name].unlink() 178 | 179 | self.assertEqual(list(h.keys()), []) 180 | 181 | def test_read_past_eof(self): 182 | a_txt = self.repo.get_root()['a.txt'] 183 | 184 | try: 185 | data = a_txt.read_data(0, 1024) 186 | except Exception, e: 187 | self.fail('read past EOF raised %r' % e) 188 | self.assertEqual(data, 'text file "a"\n') 189 | 190 | try: 191 | data = a_txt.read_data(500, 100) 192 | except Exception, e: 193 | self.fail('read past EOF raised %r' % e) 194 | self.assertEqual(data, '') 195 | 196 | def test_hardlink(self): 197 | root = self.repo.get_root() 198 | a = root['a.txt'] 199 | self.assertEqual(a.inode['nlink'], 1) 200 | 201 | linked_a = root['b'].link_file('linked_a.txt', a) 202 | self.assertTrue(id(a.inode) == id(linked_a.inode), 203 | "different inodes for `a` and `linked_a`") 204 | self.assertEqual(a.inode['nlink'], 2) 205 | 206 | a.write_data('new data for text file "a"', 0) 207 | self.assertEqual(linked_a._read_all_data(), 208 | 'new data for text file "a"') 209 | 210 | repo2 = GitStorage(self.repo_path) 211 | a_2 = repo2.get_root()['a.txt'] 212 | linked_a_2 = repo2.get_root()['b']['linked_a.txt'] 213 | self.assertTrue(id(a_2.inode) == id(linked_a_2.inode)) 214 | self.assertEqual(a_2.inode['nlink'], 2) 215 | self.assertEqual(linked_a_2._read_all_data(), 216 | 'new data for text file "a"') 217 | 218 | a.unlink() 219 | self.assertEqual(linked_a.inode['nlink'], 1) 220 | 221 | inode_name = linked_a.inode.name 222 | inodes_tt = treetree.TreeTree(self.repo.eg.root['inodes'], prefix='it') 223 | self.assertTrue(inode_name[1:] in inodes_tt) 224 | try: 225 | self.repo.get_inode(inode_name) 226 | except KeyError: 227 | self.fail() 228 | 229 | linked_a.unlink() 230 | self.assertFalse(inode_name in self.repo.eg.root['inodes']) 231 | self.assertRaises(KeyError, self.repo.get_inode, inode_name) 232 | 233 | class LargeFileTestCase(SpaghettiTestCase): 234 | large_data = randomdata(1024 * 1024) # 1 MB 235 | 236 | def assert_file_contents(self, reference): 237 | repo2 = GitStorage(self.repo_path) 238 | f = repo2.get_root()['b']['f'] 239 | self.assertEqual(f._read_all_data(), reference, 240 | '`f._read_all_data()` and `reference` do not match') 241 | 242 | def test_store(self): 243 | f = self.repo.get_root()['b'].create_file('f') 244 | f.write_data(self.large_data, 0) 245 | self.assert_file_contents(self.large_data) 246 | 247 | def test_write_chunks(self): 248 | f = self.repo.get_root()['b'].create_file('f') 249 | block_size = 64*1024 # 64 KB 250 | for offset in xrange(0, len(self.large_data), block_size): 251 | f.write_data(self.large_data[offset:offset + block_size], offset) 252 | self.assert_file_contents(self.large_data) 253 | 254 | def test_write_random(self): 255 | f = self.repo.get_root()['b'].create_file('f') 256 | block_size = 39 * 1024 # 39 KB 257 | offsets = range(0, len(self.large_data), block_size) 258 | random.shuffle(offsets) 259 | for offset in offsets: 260 | f.write_data(self.large_data[offset:offset + block_size], offset) 261 | self.assert_file_contents(self.large_data) 262 | 263 | def test_truncate(self): 264 | f = self.repo.get_root()['b'].create_file('f') 265 | f.write_data(self.large_data[:877*1024], 0) 266 | f.truncate(400*1024) 267 | self.assert_file_contents(self.large_data[:400*1024]) 268 | f.write_data(self.large_data[400*1024:], 400*1024) 269 | self.assert_file_contents(self.large_data) 270 | 271 | def test_write_at_boundaries(self): 272 | # TODO: don't assume a 64 KB block size 273 | kb64 = 64*1024 274 | b = self.repo.get_root()['b'] 275 | 276 | f = b.create_file('f') 277 | f.write_data('', 0) 278 | self.assert_file_contents('') 279 | f.unlink() 280 | 281 | f = b.create_file('f') 282 | f.write_data(self.large_data[:kb64-1], 0) 283 | self.assert_file_contents(self.large_data[:kb64-1]) 284 | f.unlink() 285 | 286 | f = b.create_file('f') 287 | f.write_data(self.large_data[:kb64], 0) 288 | self.assert_file_contents(self.large_data[:kb64]) 289 | f.unlink() 290 | 291 | f = b.create_file('f') 292 | f.write_data(self.large_data[:kb64+1], 0) 293 | self.assert_file_contents(self.large_data[:kb64+1]) 294 | f.unlink() 295 | 296 | f = b.create_file('f') 297 | f.write_data('', kb64) 298 | self.assert_file_contents('\0' * kb64) 299 | f.unlink() 300 | 301 | f = b.create_file('f') 302 | f.write_data('', 3*kb64 + 500) 303 | self.assert_file_contents('\0' * (3 * kb64 + 500)) 304 | f.unlink() 305 | 306 | f = b.create_file('f') 307 | f.write_data('x', kb64 - 1) 308 | self.assert_file_contents('\0' * (kb64 - 1) + 'x') 309 | f.unlink() 310 | 311 | f = b.create_file('f') 312 | f.write_data('x', kb64) 313 | self.assert_file_contents('\0' * (kb64) + 'x') 314 | f.unlink() 315 | 316 | f = b.create_file('f') 317 | f.write_data('_' * 10 * kb64, 0) 318 | f.write_data('', 3*kb64) 319 | self.assert_file_contents('_' * 10*kb64) 320 | f.unlink() 321 | 322 | f = b.create_file('f') 323 | f.write_data('_' * 10 * kb64, 0) 324 | f.write_data('', 3*kb64-1) 325 | self.assert_file_contents('_' * 10*kb64) 326 | f.unlink() 327 | 328 | f = b.create_file('f') 329 | f.write_data('_' * 10 * kb64, 0) 330 | f.write_data('', 3*kb64+1) 331 | self.assert_file_contents('_' * 10*kb64) 332 | f.unlink() 333 | 334 | f = b.create_file('f') 335 | f.write_data('_' * 10 * kb64, 0) 336 | f.write_data('x', 3*kb64 - 1) 337 | self.assert_file_contents('_' * (3*kb64-1) + 'x' + '_' * (7*kb64)) 338 | f.unlink() 339 | 340 | f = b.create_file('f') 341 | f.write_data('_' * 10 * kb64, 0) 342 | f.write_data('x', 3*kb64) 343 | self.assert_file_contents('_' * (3*kb64) + 'x' + '_' * (7*kb64-1)) 344 | f.unlink() 345 | 346 | f = b.create_file('f') 347 | f.write_data('_' * 10 * kb64, 0) 348 | f.write_data('x', 3*kb64+1) 349 | self.assert_file_contents('_' * (3*kb64+1) + 'x' + '_' * (7*kb64-2)) 350 | f.unlink() 351 | 352 | f = b.create_file('f') 353 | f.write_data('_' * 10 * kb64, 0) 354 | f.write_data('xy', 3*kb64-1) 355 | self.assert_file_contents('_' * (3*kb64-1) + 'xy' + '_' * (7*kb64-1)) 356 | f.unlink() 357 | 358 | class InodeMetaTestCase(SpaghettiTestCase): 359 | def test_read(self): 360 | a = self.repo.get_root()['a.txt'] 361 | self.assertEqual(a.inode['mode'], 0100644) 362 | self.assertEqual(a.inode['nlink'], 1) 363 | self.assertEqual(a.inode['uid'], 0) 364 | self.assertEqual(a.inode['gid'], 0) 365 | 366 | def test_write(self): 367 | a = self.repo.get_root()['a.txt'] 368 | a.inode['mode'] = 0100755 369 | a.inode['uid'] = 1000 370 | 371 | repo2 = GitStorage(self.repo_path) 372 | a_2 = repo2.get_root()['a.txt'] 373 | self.assertEqual(a_2.inode['mode'], 0100755) 374 | self.assertEqual(a_2.inode['uid'], 1000) 375 | 376 | class GitStructureTestCase(SpaghettiTestCase): 377 | def test_commit_chain(self): 378 | def assert_head_ancestor(repo, ancestor_id): 379 | commit = repo.commit(repo.head()) 380 | while True: 381 | try: 382 | commit_id = commit.get_parents()[0] 383 | except IndexError: 384 | self.fail('ancestor not in history of current head') 385 | 386 | if commit_id == ancestor_id: 387 | return # we found the ancestor; test successful 388 | 389 | commit = repo.commit(commit_id) 390 | 391 | c = self.repo.get_root()['b']['c'] 392 | HEAD_0 = dulwich.repo.Repo(self.repo_path).head() 393 | c.create_directory('x') 394 | 395 | repo = dulwich.repo.Repo(self.repo_path) 396 | HEAD_1 = repo.head() 397 | assert_head_ancestor(repo, HEAD_0) 398 | 399 | c['x'].create_file('f') 400 | repo = dulwich.repo.Repo(self.repo_path) 401 | assert_head_ancestor(repo, HEAD_1) 402 | 403 | class RepoInitTestCase(unittest.TestCase): 404 | def setUp(self): 405 | self.tmpdir = tempfile.mkdtemp() 406 | self.repo_path = path.join(self.tmpdir, 'test.sfs') 407 | 408 | def tearDown(self): 409 | shutil.rmtree(self.tmpdir) 410 | 411 | def test_create_repo(self): 412 | repo = GitStorage.create(self.repo_path) 413 | 414 | git = dulwich.repo.Repo(self.repo_path) 415 | commit_tree = git.tree(git.commit(git.head()).tree) 416 | self.assertEqual(len(commit_tree.entries()), 4) 417 | 418 | inodes_tree = git.tree(commit_tree['inodes'][1]) 419 | self.assertEqual(len(inodes_tree), 0) 420 | 421 | root_ls_blob = git.get_blob(commit_tree['root.ls'][1]) 422 | self.assertEqual(root_ls_blob.data, '') 423 | 424 | root_sub_tree = git.tree(commit_tree['root.sub'][1]) 425 | self.assertEqual(len(root_sub_tree.entries()), 0) 426 | 427 | features_blob = git.get_blob(commit_tree['features'][1]) 428 | features_dict = json.loads(features_blob.data) 429 | self.assertEqual(features_dict['next_inode_number'], 1) 430 | 431 | def test_create_first_objects(self): 432 | repo = GitStorage.create(self.repo_path) 433 | root = repo.get_root() 434 | 435 | root.create_directory('some_folder') 436 | repo.commit('created "some folder"') 437 | repo2 = GitStorage(self.repo_path) 438 | self.assertEqual(set(repo2.get_root().keys()), 439 | set(['some_folder'])) 440 | self.assertEqual(set(repo2.get_root()['some_folder'].keys()), set()) 441 | 442 | f = root.create_file('some_file') 443 | self.assertEqual(f.inode.name, 'i1') 444 | f.write_data('xy', 0) 445 | repo.commit('created "some file"') 446 | repo2 = GitStorage(self.repo_path) 447 | self.assertEqual(set(repo2.get_root().keys()), 448 | set(['some_folder', 'some_file'])) 449 | self.assertEqual(repo2.get_root()['some_file']._read_all_data(), 'xy') 450 | 451 | class MockBlob(object): 452 | def __init__(self, data): 453 | self.data = data 454 | 455 | class FeaturesTestCase(unittest.TestCase): 456 | def test_read(self): 457 | features = FeatureBlob(MockBlob('{"a": 13}')) 458 | self.assertEqual(features['a'], 13) 459 | self.assertEqual(features.get('a'), 13) 460 | self.assertRaises(KeyError, lambda: features['b']) 461 | self.assertRaises(KeyError, lambda: features.get('b')) 462 | self.assertEqual(features.get('b', 'x'), 'x') 463 | 464 | def test_write(self): 465 | mb = MockBlob('{"a": 13}') 466 | features = FeatureBlob(mb) 467 | features['b'] = 'asdf' 468 | self.assertEqual(json.loads(mb.data), {'a': 13, 'b': 'asdf'}) 469 | 470 | def test_write_error(self): 471 | features = FeatureBlob(MockBlob('{"a": 13}')) 472 | 473 | def set_bad_key(): 474 | features[13] = 'asdf' 475 | self.assertRaises(AssertionError, set_bad_key) 476 | 477 | def set_bad_value(): 478 | features['b'] = ['asdf'] 479 | self.assertRaises(AssertionError, set_bad_value) 480 | 481 | try: 482 | features['b'] = 'asdf' 483 | features['c'] = 15 484 | except ValueError: 485 | self.fail('Strings and numbers should be allowed') 486 | 487 | if __name__ == '__main__': 488 | setup_logger('ERROR') 489 | unittest.main() 490 | -------------------------------------------------------------------------------- /spaghettifs/storage.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | import UserDict 4 | import logging 5 | import binascii 6 | from cStringIO import StringIO 7 | from itertools import chain 8 | import weakref 9 | import json 10 | import functools 11 | 12 | from easygit import EasyGit 13 | from treetree import TreeTree 14 | 15 | log = logging.getLogger('spaghettifs.storage') 16 | log.setLevel(logging.DEBUG) 17 | 18 | class FeatureBlob(object): 19 | def __init__(self, blob): 20 | self.blob = blob 21 | 22 | def load(self): 23 | return json.loads(self.blob.data) 24 | 25 | def save(self, data): 26 | self.blob.data = json.dumps(data) 27 | 28 | nothing = object() # marker object 29 | def get(self, key, default=nothing): 30 | try: 31 | return self.load()[key] 32 | except KeyError: 33 | if default is not self.nothing: 34 | return default 35 | else: 36 | raise 37 | 38 | def __getitem__(self, key): 39 | return self.get(key) 40 | 41 | def __setitem__(self, key, value): 42 | assert isinstance(key, basestring) 43 | assert isinstance(value, (basestring, int)) 44 | data = self.load() 45 | data[key] = value 46 | self.save(data) 47 | 48 | class GitStorage(object): 49 | commit_author = "Spaghetti User " 50 | 51 | @classmethod 52 | def create(cls, repo_path): 53 | if not os.path.isdir(repo_path): 54 | os.mkdir(repo_path) 55 | 56 | eg = EasyGit.new_repo(repo_path, bare=True) 57 | 58 | inodes = eg.root.new_tree('inodes') 59 | root_ls = eg.root.new_blob('root.ls') 60 | root_sub = eg.root.new_tree('root.sub') 61 | features_blob = eg.root.new_blob('features') 62 | 63 | features_blob.data = '{}' 64 | features = FeatureBlob(features_blob) 65 | features['next_inode_number'] = 1 66 | features['inode_index_format'] = 'treetree' 67 | features['inode_format'] = 'treetree' 68 | 69 | eg.commit(cls.commit_author, 'Created empty filesystem') 70 | 71 | return cls(repo_path) 72 | 73 | def __init__(self, repo_path, autocommit=True): 74 | self.eg = EasyGit.open_repo(repo_path) 75 | features = FeatureBlob(self.eg.root['features']) 76 | assert features.get('inode_format', None) == 'treetree' 77 | assert features.get('inode_index_format', None) == 'treetree' 78 | self.autocommit = autocommit 79 | log.debug('Loaded storage, autocommit=%r, HEAD=%r', 80 | autocommit, self.eg.get_head_id()) 81 | self._inode_cache = {} 82 | self._inodes_tt = TreeTree(self.eg.root['inodes'], prefix='it') 83 | 84 | def get_root(self): 85 | commit_tree = self.eg.root 86 | root_ls = commit_tree['root.ls'] 87 | root_sub = commit_tree['root.sub'] 88 | root = StorageDir('root', root_ls, root_sub, '/', self, None) 89 | root.path = '/' 90 | return root 91 | 92 | def get_inode(self, name): 93 | if name in self._inode_cache: 94 | inode = self._inode_cache[name]() 95 | if inode is None: 96 | del self._inode_cache[name] 97 | else: 98 | return inode 99 | 100 | inode_tree = self._inodes_tt[name[1:]] 101 | inode = StorageInode(name, inode_tree, self) 102 | self._inode_cache[name] = weakref.ref(inode) 103 | 104 | return inode 105 | 106 | def create_inode(self): 107 | features = FeatureBlob(self.eg.root['features']) 108 | next_inode_number = features['next_inode_number'] 109 | features['next_inode_number'] = next_inode_number + 1 110 | 111 | inode_name = 'i%d' % next_inode_number 112 | inode_tree = self._inodes_tt.new_tree(inode_name[1:]) 113 | inode_tree.new_blob('meta').data = StorageInode.default_meta 114 | return self.get_inode(inode_name) 115 | 116 | def _remove_inode(self, name): 117 | if name in self._inode_cache: 118 | del self._inode_cache[name] 119 | 120 | def _autocommit(self): 121 | if self.autocommit: 122 | self.commit("Auto commit") 123 | 124 | def commit(self, message=None, amend=False, head_id=None, branch='master'): 125 | log.info('Committing') 126 | 127 | if head_id is None: 128 | head_id = self.eg.get_head_id(branch) 129 | 130 | if amend: 131 | git = self.eg.git 132 | prev_commit = git.commit(head_id) 133 | parents = prev_commit.parents 134 | if message is None: 135 | message = prev_commit.message 136 | else: 137 | parents = [head_id] 138 | 139 | assert message is not None 140 | 141 | self.eg.commit(self.commit_author, message, parents, 142 | branch=branch) 143 | 144 | class StorageDir(object, UserDict.DictMixin): 145 | is_dir = True 146 | 147 | def __init__(self, name, ls_blob, sub_tree, path, storage, parent): 148 | self.name = name 149 | self.ls_blob = ls_blob # blob that lists our contents 150 | self.sub_tree = sub_tree # tree that keeps our subfolders 151 | self.path = path 152 | self.storage = storage 153 | self.parent = parent 154 | log.debug('Loaded folder %r', name) 155 | 156 | def _iter_contents(self): 157 | return iter_entries(self.ls_blob.data) 158 | 159 | def keys(self): 160 | for name, value in self._iter_contents(): 161 | yield name 162 | 163 | def __getitem__(self, key): 164 | for name, value in self._iter_contents(): 165 | if key == name: 166 | break 167 | else: 168 | raise KeyError('Folder entry %s not found' % repr(key)) 169 | 170 | if value == '/': 171 | qname = quote(name) 172 | child_ls = self.sub_tree[qname + '.ls'] 173 | try: 174 | child_sub = self.sub_tree[qname + '.sub'] 175 | except KeyError: 176 | child_sub = self.sub_tree.new_tree(qname + '.sub') 177 | self.storage._autocommit() 178 | return StorageDir(name, child_ls, child_sub, 179 | self.path + name + '/', 180 | self.storage, self) 181 | else: 182 | inode = self.storage.get_inode(value) 183 | return StorageFile(name, inode, self) 184 | 185 | def create_file(self, name, inode=None): 186 | check_filename(name) 187 | 188 | if inode is None: 189 | log.info('Creating file %r in %r', name, self.path) 190 | inode = self.storage.create_inode() 191 | else: 192 | assert(inode.storage is self.storage) 193 | log.info('Linking file %r in %r to inode %r', 194 | name, self.path, inode.name) 195 | inode['nlink'] += 1 196 | 197 | with self.ls_blob as b: 198 | b.data += "%s %s\n" % (quote(name), inode.name) 199 | 200 | self.storage._autocommit() 201 | 202 | return self[name] 203 | 204 | def link_file(self, name, src_file): 205 | """ Make a new file, hard-linked to `src_file` """ 206 | assert not src_file.is_dir 207 | return self.create_file(name, src_file.inode) 208 | 209 | def create_directory(self, name): 210 | check_filename(name) 211 | log.info('Creating directory %s in %s', repr(name), repr(self.path)) 212 | 213 | qname = quote(name) 214 | with self.sub_tree as st: 215 | child_ls_blob = st.new_blob(qname + '.ls') 216 | with self.ls_blob as b: 217 | b.data += "%s /\n" % qname 218 | 219 | self.storage._autocommit() 220 | 221 | return self[name] 222 | 223 | def remove_ls_entry(self, rm_name): 224 | ls_data = '' 225 | removed_count = 0 226 | for name, value in self._iter_contents(): 227 | if name == rm_name: 228 | log.debug('Removing ls entry %s from %s', 229 | repr(rm_name), repr(self.path)) 230 | removed_count += 1 231 | else: 232 | ls_data += '%s %s\n' % (quote(name), value) 233 | assert removed_count == 1 234 | 235 | with self.ls_blob as b: 236 | b.data = ls_data 237 | 238 | self.storage._autocommit() 239 | 240 | def unlink(self): 241 | log.info('Removing folder %s', repr(self.path)) 242 | 243 | self.ls_blob.remove() 244 | self.sub_tree.remove() 245 | self.parent.remove_ls_entry(self.name) 246 | 247 | self.storage._autocommit() 248 | 249 | class StorageInode(object): 250 | blocksize = 64*1024 # 64 KB 251 | #blocksize = 1024*1024 # 1024 KB 252 | 253 | default_meta = ('mode: 0100644\n' 254 | 'nlink: 1\n' 255 | 'uid: 0\n' 256 | 'gid: 0\n' 257 | 'size: 0\n') 258 | int_meta = ('nlink', 'uid', 'gid', 'size') 259 | oct_meta = ('mode',) 260 | 261 | def __init__(self, name, tree, storage): 262 | self.name = name 263 | self.tree = tree 264 | self.storage = storage 265 | self.tt = TreeTree(tree, prefix='bt') 266 | log.debug('Loaded inode %r', name) 267 | 268 | def _read_meta(self): 269 | try: 270 | meta_blob = self.tree['meta'] 271 | except KeyError: 272 | meta_raw = self.default_meta 273 | else: 274 | meta_raw = meta_blob.data 275 | 276 | return dict(line.split(': ', 1) 277 | for line in meta_raw.strip().split('\n')) 278 | 279 | def _write_meta(self, meta_data): 280 | meta_raw = ''.join('%s: %s\n' % (key, value) 281 | for key, value in sorted(meta_data.items())) 282 | self.tree.new_blob('meta').data = meta_raw 283 | self.storage._autocommit() 284 | 285 | def __getitem__(self, key): 286 | value = self._read_meta()[key] 287 | 288 | if key in self.oct_meta: 289 | value = int(value, base=8) 290 | elif key in self.int_meta: 291 | value = int(value) 292 | 293 | return value 294 | 295 | def __setitem__(self, key, value): 296 | if key in self.oct_meta: 297 | value = '0%o' % value 298 | elif key in self.int_meta: 299 | value = '%d' % value 300 | else: 301 | raise NotImplementedError 302 | 303 | meta_data = self._read_meta() 304 | meta_data[key] = value 305 | self._write_meta(meta_data) 306 | 307 | def read_block(self, n): 308 | block_name = str(n) 309 | log.debug('Reading block %r of inode %r', block_name, self.name) 310 | try: 311 | block = self.tt[block_name] 312 | except KeyError: 313 | return '' 314 | else: 315 | return block.data 316 | 317 | def write_block(self, n, data): 318 | block_name = str(n) 319 | log.debug('Writing block %r of inode %r', block_name, self.name) 320 | try: 321 | block = self.tt[block_name] 322 | except KeyError: 323 | block = self.tt.new_blob(block_name) 324 | block.data = data 325 | 326 | self.storage._autocommit() 327 | 328 | def delete_block(self, n): 329 | block_name = str(n) 330 | log.debug('Removing block %r of inode %r', block_name, self.name) 331 | del self.tt[block_name] 332 | 333 | self.storage._autocommit() 334 | 335 | def read_data(self, offset, length): 336 | end = offset + length 337 | eof = self['size'] 338 | if end > eof: 339 | end = eof 340 | length = end - offset 341 | if length <= 0: 342 | return '' 343 | first_block = offset / self.blocksize 344 | last_block = end / self.blocksize 345 | 346 | output = StringIO() 347 | for n_block in range(first_block, last_block+1): 348 | block_offset = n_block * self.blocksize 349 | 350 | fragment_offset = 0 351 | if n_block == first_block: 352 | fragment_offset = offset - block_offset 353 | 354 | fragment_end = self.blocksize 355 | if n_block == last_block: 356 | fragment_end = end - block_offset 357 | 358 | block_data = self.read_block(n_block) 359 | fragment = block_data[fragment_offset:fragment_end] 360 | assert len(fragment) == fragment_end - fragment_offset 361 | output.write(fragment) 362 | 363 | output = output.getvalue() 364 | assert len(output) == length 365 | return output 366 | 367 | def write_data(self, data, offset): 368 | current_size = self['size'] 369 | if current_size < offset: 370 | self.truncate(offset) 371 | 372 | log.info('Inode %s writing %d bytes at offset %d', 373 | repr(self.name), len(data), offset) 374 | 375 | end = offset + len(data) 376 | first_block = offset / self.blocksize 377 | last_block = end / self.blocksize 378 | 379 | for n_block in range(first_block, last_block+1): 380 | block_offset = n_block * self.blocksize 381 | 382 | insert_offset = 0 383 | if n_block == first_block: 384 | insert_offset = offset - block_offset 385 | 386 | insert_end = self.blocksize 387 | if n_block == last_block: 388 | insert_end = end - block_offset 389 | 390 | data_start = block_offset + insert_offset - offset 391 | data_end = block_offset + insert_end - offset 392 | 393 | log.debug('Updating inode %d between (%d, %d) ' 394 | 'with data slice between (%d, %d)', 395 | n_block, insert_offset, insert_end, 396 | data_start, data_end) 397 | 398 | current_data = self.read_block(n_block) 399 | datafile = StringIO() 400 | datafile.write(current_data) 401 | datafile.seek(insert_offset) 402 | datafile.write(data[data_start:data_end]) 403 | self.write_block(n_block, datafile.getvalue()) 404 | 405 | if end > current_size: 406 | self['size'] = end 407 | 408 | def truncate(self, new_size): 409 | log.info("Truncating inode %s, new size %d", repr(self.name), new_size) 410 | 411 | current_size = self['size'] 412 | if current_size < new_size: 413 | # TODO: avoid creating one big string 414 | self.write_data('\0' * (new_size - current_size), current_size) 415 | 416 | elif current_size > new_size: 417 | first_block = new_size / self.blocksize 418 | last_block = current_size / self.blocksize 419 | truncate_offset = new_size % self.blocksize 420 | 421 | for n_block in range(first_block, last_block+1): 422 | if n_block == first_block and truncate_offset > 0: 423 | old_data = self.read_block(n_block) 424 | self.write_block(n_block, old_data[:truncate_offset]) 425 | else: 426 | self.delete_block(n_block) 427 | 428 | self['size'] = new_size 429 | 430 | def unlink(self): 431 | log.info('Unlinking inode %r', self.name) 432 | 433 | nlink = self['nlink'] - 1 434 | if nlink > 0: 435 | log.info('Links remaining for inode %r: %d', self.name, nlink) 436 | self['nlink'] = nlink 437 | else: 438 | log.info('Links remaining for inode %r: 0; removing.', self.name) 439 | self.storage._remove_inode(self.name) 440 | self.tree.remove() 441 | 442 | self.storage._autocommit() 443 | 444 | class StorageFile(object): 445 | is_dir = False 446 | 447 | def __init__(self, name, inode, parent): 448 | self.name = name 449 | self.inode = inode 450 | self.parent = parent 451 | 452 | @property 453 | def path(self): 454 | return self.parent.path + self.name 455 | 456 | @property 457 | def size(self): 458 | return self.inode['size'] 459 | 460 | def _read_all_data(self): 461 | return self.read_data(0, self.size) 462 | 463 | def read_data(self, offset, length): 464 | return self.inode.read_data(offset, length) 465 | 466 | def write_data(self, data, offset): 467 | return self.inode.write_data(data, offset) 468 | 469 | def truncate(self, new_size): 470 | return self.inode.truncate(new_size) 471 | 472 | def unlink(self): 473 | log.info('Unlinking file %s', repr(self.path)) 474 | self.parent.remove_ls_entry(self.name) 475 | self.inode.unlink() 476 | 477 | def quote(name): 478 | return (binascii.b2a_qp(name, quotetabs=True, istext=False) 479 | .replace('=\n', '')) 480 | 481 | unquote = binascii.a2b_qp 482 | 483 | def check_filename(name): 484 | if name in ('.', '..', '') or '/' in name or len(name) > 255: 485 | raise ValueError("Bad filename %r" % name) 486 | 487 | def iter_entries(ls_data): 488 | for line in ls_data.split('\n'): 489 | if not line: 490 | continue 491 | name, value = line.rsplit(' ', 1) 492 | yield unquote(name), value 493 | 494 | upgrade_log = logging.getLogger('spaghettifs.storage.upgrade') 495 | upgrade_log.setLevel(logging.DEBUG) 496 | 497 | def storage_format_upgrade(upgrade_name, upgrade_from, upgrade_to): 498 | def decorator(the_upgrade): 499 | @functools.wraps(the_upgrade) 500 | def wrapper(repo_path): 501 | eg = EasyGit.open_repo(repo_path) 502 | 503 | if 'features' not in eg.root: 504 | upgrade_log.info('Creating "features" blob for repository %r', 505 | repo_path) 506 | b = eg.root.new_blob('features') 507 | b.data = '{}' 508 | features = FeatureBlob(eg.root['features']) 509 | 510 | for name, value in upgrade_from.iteritems(): 511 | if features.get(name, None) is not value: 512 | upgrade_log.debug('Skipping upgrade %r on repository %r, ' 513 | 'feature %r is not %r', 514 | upgrade_name, repo_path, name, value) 515 | return 516 | 517 | upgrade_log.info('Starting upgrade %r on repository %r', 518 | upgrade_name, repo_path) 519 | the_upgrade(eg) 520 | 521 | upgrade_log.debug('Writing features for upgrade %r', upgrade_name) 522 | for name, value in upgrade_to.iteritems(): 523 | features[name] = value 524 | 525 | message = "Update script %r" % upgrade_name 526 | eg.commit(GitStorage.commit_author, message, 527 | [eg.get_head_id('master')]) 528 | upgrade_log.info('Finished upgrade %r on repository %r', 529 | upgrade_name, repo_path) 530 | 531 | return wrapper 532 | 533 | return decorator 534 | 535 | @storage_format_upgrade('Convert inode blocks list to treetree', 536 | upgrade_from={'inode_format': None}, 537 | upgrade_to={'inode_format': 'treetree'}) 538 | def convert_fs_to_treetree_inodes(eg): 539 | """ 540 | Convert an existing filesystem from the "inode with flat list of blocks" 541 | format to the "inode with treetree of blocks" format. 542 | """ 543 | 544 | inode_index = eg.root['inodes'] 545 | 546 | class DummyStorage(object): 547 | def _autocommit(self): pass 548 | s = DummyStorage() 549 | 550 | for inode_name in inode_index: 551 | upgrade_log.debug('Reorganizing inode %r', inode_name) 552 | inode = StorageInode(inode_name, inode_index[inode_name], s) 553 | 554 | block_offsets = set() 555 | for old_block_name in inode.tree: 556 | if old_block_name.startswith('b'): 557 | if not old_block_name.startswith('bt'): 558 | block_offsets.add(int(old_block_name[1:])) 559 | 560 | for block_offset in sorted(block_offsets): 561 | old_block_name = 'b%d' % block_offset 562 | new_block_name = str(block_offset / StorageInode.blocksize) 563 | old_block = inode.tree[old_block_name] 564 | new_block = inode.tt.clone(old_block, new_block_name) 565 | del inode.tree[old_block_name] 566 | 567 | inode['size'] = block_offset + len(new_block.data) 568 | inode.tree._commit() 569 | 570 | @storage_format_upgrade('Convert list of inodes to treetree', 571 | upgrade_from={'inode_index_format': None}, 572 | upgrade_to={'inode_index_format': 'treetree'}) 573 | def convert_fs_to_treetree_inode_index(eg): 574 | """ 575 | Convert a filesystem from the "inode index as as flat list" format to the 576 | "inode index as treetree" format. 577 | """ 578 | 579 | inode_index_raw = eg.root['inodes'] 580 | inode_index_tt = TreeTree(inode_index_raw, prefix='it') 581 | 582 | all_inode_names = list(inode_index_raw.keys()) 583 | largest_number = -1 584 | for inode_name in all_inode_names: 585 | upgrade_log.debug('Moving inode %r to treetree', inode_name) 586 | inode_index_tt.clone(inode_index_raw[inode_name], inode_name[1:]) 587 | del inode_index_raw[inode_name] 588 | number = int(inode_name[1:]) 589 | largest_number = max(largest_number, number) 590 | 591 | FeatureBlob(eg.root['features'])['next_inode_number'] = largest_number + 1 592 | 593 | all_updates = [ 594 | convert_fs_to_treetree_inodes, 595 | convert_fs_to_treetree_inode_index, 596 | ] 597 | --------------------------------------------------------------------------------