├── .gitignore ├── .pylintrc ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── pcachefs ├── __init__.py ├── pcachefs.py ├── pcachefsutil.py ├── ranges.py └── vfs.py ├── requirements.txt ├── setup.py └── test ├── __init__.py └── test_all.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .*.swp 3 | *~ 4 | build 5 | 6 | dist/ 7 | *.egg-info/ 8 | 9 | # test folder 10 | test/testroot 11 | 12 | # virtual env directory 13 | *.venv* 14 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | ignore=.git 4 | persistent=no 5 | 6 | 7 | [MESSAGES CONTROL] 8 | 9 | disable=invalid-name, 10 | bare-except, 11 | broad-except, 12 | missing-docstring, 13 | redefined-builtin, 14 | line-too-long, 15 | too-many-arguments, 16 | relative-import, 17 | protected-access, 18 | unidiomatic-typecheck, 19 | too-few-public-methods, 20 | too-many-instance-attributes, 21 | too-many-locals, 22 | too-many-branches, 23 | too-many-statements, 24 | too-many-lines, 25 | bad-builtin, 26 | bad-whitespace, 27 | redefined-variable-type, 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | 3 | language: python 4 | python: 5 | - "2.7" 6 | 7 | install: 8 | - sudo apt-get -qq update 9 | - sudo apt-get install -y libfuse-dev 10 | - pip install -e .[test] 11 | 12 | script: 13 | - make lint 14 | - make test 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install-archlinux: ## Install needed packages with pacman 2 | hash python2 2>/dev/null || sudo pacman -S python2 3 | hash virtualenv 2>/dev/null || sudo pip install -U virtualenv 4 | hash pip2 2>/dev/null || sudo pacman -S python-pip 5 | [ -f /usr/include/fuse/fuse.h ] || sudo pacman -S fuse2 6 | 7 | 8 | venv2.7: .venv2.7/bin/activate ## Setup virtualenv with python2.7 9 | 10 | .venv2.7/bin/activate: requirements.txt setup.py 11 | test -d .venv2.7 || virtualenv --python=python2.7 .venv2.7 12 | .venv2.7/bin/pip install -e .[dev,test] 13 | touch .venv2.7/bin/activate 14 | 15 | 16 | test: test2.7 ## Run tests for all supported python versions 17 | 18 | test2.7: clean venv2.7 ## Run tests with python2.7 19 | .venv2.7/bin/python -mpytest test/test_all.py 20 | 21 | 22 | lint: venv2.7 ## Run linter 23 | .venv2.7/bin/pylint --disable=fixme pcachefs test 24 | 25 | 26 | fixme: venv2.7 ## List fixme 27 | .venv2.7/bin/pylint --disable=all --enable=fixme pcachefs test 28 | 29 | 30 | clean: ## Remove temporary files 31 | find . -name '*.pyc' -delete 32 | rm -rf build dist *.egg-info test/testroot 33 | 34 | 35 | help: ## This help 36 | @echo 'Available targets:' 37 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \ 38 | | sort \ 39 | | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 40 | 41 | 42 | .PHONY: test 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Introduction [![build status][build-img]][build-url] 2 | ============ 3 | pCacheFS provides a simple caching layer for other filesystems. This 4 | makes slow, remote filesystems seem very fast to access. Moreover, the 5 | cache does not disappear when you start or stop pCacheFS or if you 6 | reboot your computer - it is **persistent**. 7 | 8 | It is designed for caching large amounts of data on remote filesystems 9 | that don't change very much, such as movie or music libraries. 10 | 11 | Disclaimer 12 | ========== 13 | The code originates from http://code.google.com/p/pcachefs/. The 14 | original copyright notice is: 15 | ``` 16 | Copyright 2012 Jonny Tyers 17 | pCacheFS is license under Apache License 2.0 - see the LICENSE file for details. 18 | ``` 19 | 20 | Key features 21 | ============ 22 | * You can choose where to store your persistent cache - local harddisk, ramdisk filesystem, etc. 23 | * Cache contents of any other filesystem, whether local or remote (even other FUSE filesystems such as [sshfs](http://fuse.sourceforge.net/sshfs.html)). 24 | * pCacheFS caches data as it is read, and only the bits that are read. 25 | 26 | Currently, pCacheFS mounts are **read-only** - writes are not (yet) 27 | supported. 28 | 29 | Example 30 | ======= 31 | Suppose I have a slow network filesystem mounted at `/remote`. 32 | 33 | ```sh 34 | $ ls /remote 35 | hugefile1 hugefile2 dir3 36 | ``` 37 | 38 | If I want to use another local directory as a persistent cache for this 39 | filesystem, I can use a pCacheFS mount: 40 | 41 | ```sh 42 | $ pcachefs.py -c /cache -t /remote /remote-cached 43 | ``` 44 | 45 | I will now have a mirror of `/remote` at `/remote-cached`. 46 | 47 | ```sh 48 | $ ls /remote-cached 49 | hugefile1 hugefile2 dir3 50 | ``` 51 | 52 | This is our caching filesystem. We can read files from this filesystem 53 | and their contents will be cached in files in `/cache`. (As well as file 54 | contents, metadata and directory listings are also cached.) 55 | 56 | So, the first time I access hugefile1 it will be as slow as it would 57 | have been via `/remote`: 58 | 59 | ```sh 60 | $ cat /remote-cached/hugefile1 61 | ``` 62 | 63 | But, access `hugefile1` again and you'll notice a big speed improvement. 64 | This is because the data isn't actually being read from the slow 65 | filesystem at `/remote`, it is being read from `/cache`. 66 | 67 | Note that in order to get the benefit of the cache you must access files 68 | via your pCacheFS mountpoint (`/remote-cached` above, but this can be 69 | anything you like). Accessing the target filesystem directly (via 70 | `/remote` above) will not see any speed gains as you are bypassing 71 | pCacheFS. 72 | 73 | Install 74 | ======= 75 | pCacheFS requires FUSE and the FUSE Python bindings to be installed on 76 | your system. 77 | 78 | Ubuntu users should be able to use this command to install: 79 | ``` 80 | $ sudo apt-get install fuse python-fuse 81 | ``` 82 | 83 | Then you can use pip and virtualenv to install dependencies. 84 | ``` 85 | $ virtualenv .venv2.7 -p python2.7 86 | $ source .venv2.7/bin/activate 87 | $ pip install -e '.[dev,test]' 88 | ``` 89 | 90 | [build-img]: https://travis-ci.org/ibizaman/pcachefs.svg?branch=master 91 | [build-url]: https://travis-ci.org/ibizaman/pcachefs 92 | -------------------------------------------------------------------------------- /pcachefs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | pcachefs package. 3 | """ 4 | 5 | from pcachefs import main 6 | 7 | from pcachefs import FuseStat 8 | from pcachefs import PersistentCacheFs 9 | from pcachefs import Cacher 10 | from pcachefs import UnderlyingFs 11 | -------------------------------------------------------------------------------- /pcachefs/pcachefs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Persistent caching FUSE filesystem 5 | 6 | Copyright 2012 Jonny Tyers 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | 22 | import os 23 | import pickle 24 | import signal 25 | import stat 26 | # We explicitly refer to __builtin__ here so it can be mocked 27 | import __builtin__ 28 | 29 | from pprint import pformat 30 | 31 | import fuse 32 | 33 | import vfs 34 | from ranges import (Ranges, Range) 35 | from pcachefsutil import debug, is_read_only_flags 36 | from pcachefsutil import E_PERM_DENIED, E_NOT_IMPL 37 | 38 | 39 | fuse.fuse_python_api = (0, 2) 40 | 41 | 42 | class FuseStat(fuse.Stat): 43 | """Convenient class for Stat objects. 44 | 45 | Set up the stat object based on values from the given stat object 46 | (which should come from os.stat()). 47 | """ 48 | def __init__(self, st): 49 | fuse.Stat.__init__(self) 50 | 51 | self.st_mode = st.st_mode 52 | self.st_nlink = st.st_nlink 53 | self.st_size = st.st_size 54 | self.st_atime = st.st_atime 55 | self.st_mtime = st.st_mtime 56 | self.st_ctime = st.st_ctime 57 | 58 | self.st_dev = st.st_dev 59 | self.st_gid = st.st_gid 60 | self.st_ino = st.st_ino 61 | self.st_uid = st.st_uid 62 | 63 | self.st_rdev = st.st_rdev 64 | self.st_blksize = st.st_blksize 65 | 66 | def __repr__(self): 67 | v = vars(self) 68 | v['is_dir'] = stat.S_ISDIR(v['st_mode']) 69 | v['is_char_dev'] = stat.S_ISCHR(v['st_mode']) 70 | v['is_block_dev'] = stat.S_ISBLK(v['st_mode']) 71 | v['is_file'] = stat.S_ISREG(v['st_mode']) 72 | v['is_fifo'] = stat.S_ISFIFO(v['st_mode']) 73 | v['is_symlk'] = stat.S_ISLNK(v['st_mode']) 74 | v['is_sock'] = stat.S_ISSOCK(v['st_mode']) 75 | return pformat(v) 76 | 77 | 78 | class PersistentCacheFs(fuse.Fuse): 79 | """Main FUSE class 80 | 81 | This just delegates operations to a Cacher instance. 82 | """ 83 | def __init__(self, *args, **kw): 84 | fuse.Fuse.__init__(self, *args, **kw) 85 | 86 | # Currently we have to run in single-threaded mode to prevent 87 | # the cache becoming corrupted 88 | self.parse(['-s']) 89 | 90 | self.parser.add_option('-c', '--cache-dir', dest='cache_dir', help="Specifies the directory where cached data should be stored. This will be created if it does not exist.") 91 | self.parser.add_option('-t', '--target-dir', dest='target_dir', help="The directory which we are caching. The content of this directory will be mirrored and all reads cached.") 92 | self.parser.add_option('-v', '--virtual-dir', dest='virtual_dir', help="The folder in the mount dir in which the virtual filesystem controlling pcachefs will reside.") 93 | 94 | self.cache_dir = None 95 | self.target_dir = None 96 | self.virtual_dir = None 97 | self.cacher = None 98 | self.vfs = None 99 | 100 | def main(self, args=None): 101 | options = self.cmdline[0] 102 | 103 | if options.cache_dir is None: 104 | self.parser.error('Need to specify --cache-dir') 105 | if options.target_dir is None: 106 | self.parser.error('Need to specify --target-dir') 107 | 108 | self.cache_dir = options.cache_dir 109 | self.target_dir = options.target_dir 110 | self.virtual_dir = options.virtual_dir or '.pcachefs' 111 | 112 | self.cacher = Cacher(self.cache_dir, UnderlyingFs(self.target_dir)) 113 | self.vfs = vfs.VirtualFS(self.virtual_dir, self.cacher) 114 | 115 | signal.signal(signal.SIGINT, signal.SIG_DFL) 116 | fuse.Fuse.main(self, args) 117 | 118 | def getattr(self, path): 119 | debug('PersistentCacheFs.getattr', path) 120 | if self.vfs.contains(path): 121 | return self.vfs.getattr(path) 122 | 123 | return self.cacher.getattr(path) 124 | 125 | def readdir(self, path, offset): 126 | debug('PersistentCacheFs.readdir', path, offset) 127 | for f in self.vfs.readdir(path, offset): 128 | if f is None: 129 | return 130 | yield f 131 | 132 | for f in self.cacher.readdir(path, offset): 133 | yield f 134 | 135 | def open(self, path, flags): 136 | debug('PersistentCacheFs.open', path, flags) 137 | if self.vfs.contains(path): 138 | return self.vfs.open(path, flags) 139 | 140 | if not is_read_only_flags(flags): 141 | return E_PERM_DENIED 142 | 143 | return 0 144 | 145 | def read(self, path, size, offset): 146 | debug('PersistentCacheFs.read', path, size, offset) 147 | if self.vfs.contains(path): 148 | return self.vfs.read(path, size, offset) 149 | 150 | return self.cacher.read(path, size, offset) 151 | 152 | def truncate(self, path, size): 153 | debug('PersistentCacheFs.truncate', path, size) 154 | if self.vfs.contains(path): 155 | return self.vfs.truncate(path, size) 156 | 157 | return E_NOT_IMPL 158 | 159 | def write(self, path, buf, offset): 160 | debug('PersistentCacheFs.write', path, buf, offset) 161 | if self.vfs.contains(path): 162 | return self.vfs.write(path, buf, offset) 163 | 164 | return E_NOT_IMPL 165 | 166 | def flush(self, path): 167 | debug('PersistentCacheFs.flush', path) 168 | if self.vfs.contains(path): 169 | return self.vfs.flush(path) 170 | 171 | return 0 # success 172 | 173 | def release(self, path, what): 174 | debug('PersistentCacheFs.release', path, what) 175 | if self.vfs.contains(path): 176 | return self.vfs.release(path) 177 | 178 | return 0 # success 179 | 180 | # def _getattr_special(self, path): 181 | # return FuseStat(os.stat('/proc/version')) # FIXME stat of the FUSE mountpoint 182 | # 183 | # def _read_special(self, path, size, offset): 184 | # debug("_read_special", path, size, offset) 185 | # content = None 186 | # 187 | # if path == self.CACHE_ONLY_MODE_PATH: 188 | # debug("_read_special com", path, size, offset) 189 | # if self.cacher.cache_only_mode == True: 190 | # debug(" return 1") 191 | # return '111111111111111111111111111\n'[offset:offset+size] 192 | # else: 193 | # debug(" return 0") 194 | # return '000000000000000000000000000\n'[offset:offset+size] 195 | # 196 | # else: 197 | # debug(" return NSF") 198 | # return E_NO_SUCH_FILE 199 | # 200 | # def _write_special(self, path, buf, offset): 201 | # content = buf.strip() 202 | # debug("_write_special", path, buf, offset) 203 | # 204 | # if path == self.CACHE_ONLY_MODE_PATH: 205 | # if content == '0': 206 | # self.cacher.cache_only_mode = False 207 | # return len(buf) # wrote one byte 208 | # 209 | # elif content == '1': 210 | # self.cacher.cache_only_mode = True 211 | # return len(buf) # wrote one byte 212 | # 213 | # else: 214 | # return self.E_INVAL 215 | # 216 | # else: 217 | # return E_NO_SUCH_FILE 218 | 219 | class UnderlyingFs(object): 220 | """Implementation of FUSE operations that fetches data from the underlying FS.""" 221 | def __init__(self, real_path): 222 | self.real_path = real_path 223 | 224 | def _get_real_path(self, path): 225 | if path[0] != '/': 226 | raise ValueError("Expected leading slash") 227 | 228 | return os.path.join(self.real_path, path[1:]) 229 | 230 | def getattr(self, path): 231 | debug('UnderlyingFs.getattr', path) 232 | return FuseStat(os.stat(self._get_real_path(path))) 233 | 234 | def readdir(self, path, offset): 235 | debug('UnderlyingFs.readdir', path, offset) 236 | real_path = self._get_real_path(path) 237 | 238 | dirents = [] 239 | if os.path.isdir(real_path): 240 | dirents.extend([ '.', '..' ]) 241 | 242 | dirents.extend(os.listdir(real_path)) 243 | 244 | # return a generator over the entries in the directory 245 | return (fuse.Direntry(r) for r in dirents) 246 | 247 | def read(self, path, size, offset): 248 | debug('UnderlyingFs.read', path, size, offset) 249 | real_path = self._get_real_path(path) 250 | 251 | with __builtin__.open(real_path, 'rb') as f: 252 | f.seek(offset) 253 | result = f.read(size) 254 | 255 | return result 256 | 257 | 258 | class Cacher(object): 259 | """ 260 | Represents a cache, which caches entire files and their content. 261 | This class mimics the interface of a python Fuse object. 262 | 263 | The cache is a standard filesystem directory. 264 | 265 | Initially the implementation will copy *entire* files (incl 266 | metadata) down into the cache when they are read. 267 | 268 | The cached files are stored as follows in the cache directory: 269 | /cache/dir/filename.ext/cache.data # copy of file data 270 | /cache/dir/filename.ext/cache.stat # pickle'd stat object (from os.stat()) 271 | /cache/dir/cache.list # pickle'd directory listing (from os.listdir()) 272 | 273 | For writes to files in the cache, these are passed through to the 274 | underlying filesystem without any caching. 275 | """ 276 | 277 | def __init__(self, cachedir, underlying_fs): 278 | """ 279 | Initialise a new Cacher. 280 | 281 | cachedir the directory in which to store cached files and 282 | metadata (this will created automatically if it does not exist) 283 | underlying_fs an object supporting the read(), readdir() and 284 | getattr() FUSE operations. For any files/dirs not in the cache, 285 | this object's methods will be called to retrieve the real data 286 | and populate the cache. 287 | """ 288 | self.cachedir = cachedir 289 | self.underlying_fs = underlying_fs 290 | 291 | # If this is set to True, the cacher will fail if any 292 | # requests are made for data that does not exist in the cache 293 | self.cache_only_mode = False 294 | 295 | 296 | if not os.path.exists(self.cachedir): 297 | self._mkdir(self.cachedir) 298 | 299 | def cache_only_mode_enable(self): 300 | debug('Cacher.cache_only_mode_enable') 301 | self.cache_only_mode = True 302 | 303 | def cache_only_mode_disable(self): 304 | debug('Cacher.cache_only_mode_disable') 305 | self.cache_only_mode = False 306 | 307 | def get_cached_blocks(self, path): 308 | data_cache_range = self._get_cache_dir(path, 'cache.data.range') 309 | 310 | cached_blocks = None 311 | if os.path.exists(data_cache_range): 312 | with __builtin__.open(data_cache_range, 'rb') as f: 313 | cached_blocks = pickle.load(f) 314 | else: 315 | cached_blocks = Ranges() 316 | 317 | return cached_blocks 318 | 319 | def update_cached_blocks(self, path, cached_blocks): 320 | data_cache_range = self._get_cache_dir(path, 'cache.data.range') 321 | 322 | with __builtin__.open(data_cache_range, 'wb') as f: 323 | pickle.dump(cached_blocks, f) 324 | 325 | def remove_cached_blocks(self, path): 326 | data_cache_range = self._get_cache_dir(path, 'cache.data.range') 327 | 328 | os.remove(data_cache_range) 329 | 330 | def get_cached_data(self, path, size, offset): 331 | cache_data = self._get_cache_dir(path, 'cache.data') 332 | 333 | result = None 334 | with __builtin__.open(cache_data, 'rb') as f: 335 | f.seek(offset) 336 | result = f.read(size) 337 | 338 | return result 339 | 340 | def init_cached_data(self, path): 341 | cache_data = self._get_cache_dir(path, 'cache.data') 342 | 343 | if os.path.exists(cache_data): 344 | return 345 | 346 | file_stat = self.getattr(path) 347 | self._create_cache_dir(path) 348 | 349 | with __builtin__.open(cache_data, 'wb') as f: 350 | f.truncate() 351 | f.seek(file_stat.st_size - 1) 352 | f.write('\0') 353 | 354 | def update_cached_data(self, path, blocks_to_read): 355 | if not blocks_to_read: 356 | return 357 | 358 | cache_data = self._get_cache_dir(path, 'cache.data') 359 | 360 | # Now open it up in update mode so we can add data to it as 361 | # we read the data from the underlying filesystem 362 | with __builtin__.open(cache_data, 'r+b') as cache_data_file: 363 | 364 | # Now loop through all the blocks we need to get 365 | # and append them to the cached file as we go 366 | for block in blocks_to_read: 367 | block_data = self.underlying_fs.read(path, block.size, block.start) 368 | 369 | cache_data_file.seek(block.start) 370 | cache_data_file.write(block_data) # overwrites existing data in the file 371 | 372 | def remove_cached_data(self, path): 373 | data_cache = self._get_cache_dir(path, 'cache.data') 374 | os.remove(data_cache) 375 | 376 | data_cache_range = self._get_cache_dir(path, 'cache.data.range') 377 | os.remove(data_cache_range) 378 | 379 | def read(self, path, size, offset, force_reload=False): 380 | """Read the given data from the given path on the filesystem. 381 | 382 | Any parts which are requested and are not in the cache are read 383 | from the underlying filesystem 384 | """ 385 | debug('Cacher.read', path, size, offset) 386 | 387 | self.init_cached_data(path) 388 | 389 | if force_reload: 390 | self.remove_cached_blocks(path) 391 | 392 | cached_blocks = self.get_cached_blocks(path) 393 | blocks_to_read = cached_blocks.get_uncovered_portions(Range(offset, offset+size)) 394 | 395 | self.update_cached_data(path, blocks_to_read) 396 | self.update_cached_blocks(path, cached_blocks.add_ranges(blocks_to_read)) 397 | 398 | return self.get_cached_data(path, size, offset) 399 | 400 | 401 | def readdir(self, path, offset): 402 | """List the given directory, from the cache.""" 403 | debug('Cacher.readdir', path, offset) 404 | cache_dir = self._get_cache_dir(path, 'cache.list') 405 | 406 | result = None 407 | if os.path.exists(cache_dir): 408 | with __builtin__.open(cache_dir, 'rb') as list_cache_file: 409 | result = pickle.load(list_cache_file) 410 | 411 | else: 412 | result_generator = self.underlying_fs.readdir(path, offset) 413 | result = list(result_generator) 414 | 415 | self._create_cache_dir(path) 416 | with __builtin__.open(cache_dir, 'wb') as list_cache_file: 417 | pickle.dump(result, list_cache_file) 418 | 419 | # Return a new generator over our list of items 420 | return (x for x in result) 421 | 422 | def getattr(self, path): 423 | """Retrieve stat information for a particular file from the cache.""" 424 | debug('Cacher.getattr', path) 425 | cache_dir = self._get_cache_dir(path, 'cache.stat') 426 | 427 | result = None 428 | if os.path.exists(cache_dir): 429 | with __builtin__.open(cache_dir, 'rb') as stat_cache_file: 430 | result = pickle.load(stat_cache_file) 431 | 432 | else: 433 | result = self.underlying_fs.getattr(path) 434 | 435 | self._create_cache_dir(path) 436 | with __builtin__.open(cache_dir, 'wb') as stat_cache_file: 437 | pickle.dump(result, stat_cache_file) 438 | 439 | return result 440 | 441 | def write(self, path, buf, offset): # pylint: disable=no-self-use 442 | debug('Cacher.write', path, buf, offset) 443 | return E_NOT_IMPL 444 | 445 | def _get_cache_dir(self, path, file = None): 446 | """For a given path, return the name of the directory used to cache data for that path.""" 447 | if path[0] != '/': 448 | raise ValueError("Expected leading slash") 449 | 450 | if file is None: 451 | return os.path.join(self.cachedir, path[1:]) 452 | 453 | return os.path.join(self.cachedir, path[1:], file) 454 | 455 | def _create_cache_dir(self, path): 456 | """Create the cache path for the given directory if it does not already exist.""" 457 | cache_dir = self._get_cache_dir(path) 458 | self._mkdir(cache_dir) 459 | 460 | def _mkdir(self, path): # pylint: disable=no-self-use 461 | """Create the given directory if it does not already exist.""" 462 | if not os.path.exists(path): 463 | os.makedirs(path) 464 | 465 | 466 | def main(args=None): 467 | usage=""" 468 | pCacheFS: A persistently caching filesystem. 469 | """ + fuse.Fuse.fusage 470 | 471 | version = "%prog " + fuse.__version__ 472 | 473 | server = PersistentCacheFs(version=version, usage=usage, dash_s_do='setsingle') 474 | 475 | parsed_args = server.parse(args, errex=1) 476 | if not parsed_args.getmod('showhelp'): 477 | server.main() 478 | 479 | if __name__ == '__main__': 480 | main() 481 | -------------------------------------------------------------------------------- /pcachefs/pcachefsutil.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility methods used across pcachefs. 3 | """ 4 | import errno 5 | import os 6 | import sys 7 | 8 | DEBUG = True 9 | def debug(*words): 10 | if DEBUG: 11 | sys.stderr.write('DEBUG: %s\n' % ' '.join(str(word) for word in words)) 12 | 13 | # Error codes 14 | # source: /usr/lib/syslinux/com32/include/errno.h 15 | E_NO_SUCH_FILE = -errno.ENOENT 16 | E_NOT_PERMITTED = -errno.EPERM 17 | E_IO_ERROR = -errno.EIO 18 | E_PERM_DENIED = -errno.EACCES 19 | E_READ_ONLY = -errno.EROFS 20 | E_NOT_IMPL= -errno.ENOSYS 21 | E_INVALID_ARG = -errno.EINVAL 22 | 23 | 24 | def is_read_only_flags(flags): 25 | access_flags = os.O_RDONLY | os.O_WRONLY | os.O_RDWR 26 | return flags & access_flags == os.O_RDONLY 27 | -------------------------------------------------------------------------------- /pcachefs/ranges.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Range and Ranges classes used by pCacheFS 5 | 6 | Copyright 2012 Jonny Tyers 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | 22 | class Range(object): 23 | """Represents a range of integers (i.e. a start and an end).""" 24 | def __init__(self, start, end): 25 | if start >= end: 26 | raise ValueError('start (' + str(start) + ') must be smaller than end (' + str(end) + ')') 27 | 28 | self.start = start 29 | self.end = end 30 | 31 | self.size = end - start 32 | 33 | def __repr__(self): 34 | return 'Range ' + str(self.start )+ '..' + str(self.end) 35 | 36 | def __cmp__(self, other): 37 | if type(other) == Range: 38 | if self.start == other.start: 39 | return cmp(self.end, other.end) 40 | return cmp(self.start, other.start) 41 | 42 | else: 43 | if self.start == other: 44 | return cmp(self.end, other) 45 | return cmp(self.start, other) 46 | 47 | def contains(self, i): 48 | if type(i) == Range: 49 | return i.start >= self.start and i.end <= self.end 50 | 51 | return i >= self.start and i <= self.end 52 | 53 | class Ranges(object): 54 | """A group of ranges. 55 | 56 | This class is a list with special awareness of Range objects. It 57 | will re-jig its contents as new Ranges are added to ensure that 58 | Ranges never overlap and are in order. 59 | 60 | For example: 61 | ranges = Ranges() 62 | 63 | ranges.add_range(Range(0, 3)) 64 | # ranges now just contains (0, 3) 65 | 66 | ranges.add_range(Range(6, 10)) 67 | # ranges = (0,3) (6,10) 68 | 69 | ranges.add_range(Range(7, 15)) 70 | # ranges = (0,3) (6,15) 71 | # (6,10) and (7,15) overlap so they have been merged and the 72 | # resulting range that is covered by both is added instead 73 | 74 | ranges.add_range(Range(3, 5)) 75 | # ranges = (0,5) (6,15) 76 | # (0,3) and (3,5) overlap, so they are merged 77 | 78 | ranges.add_range(Range(5, 6)) 79 | # ranges = (0,15) 80 | # (5,6) overlaps with both (0,5) and (6,15) so it is merged with 81 | # both 82 | 83 | ranges.add_range(Range(15, 16)) 84 | # ranges = (0,16) 85 | 86 | ranges.add_range(Range(1, 3)) 87 | # ranges = (0,16) 88 | # (1,3) is already included in our range so it is effectively 89 | # ignored 90 | """ 91 | def __init__(self): 92 | self.ranges = [] 93 | 94 | # start and end of the entire range (ie the start point of the 95 | # starting range to the end point of the finishing range) 96 | self.start = 0 97 | self.end = 0 98 | 99 | def __repr__(self): 100 | return str(self.ranges) 101 | 102 | def _cleanup(self): 103 | old_ranges = list(self.ranges) 104 | old_ranges.sort() 105 | 106 | i = 0 107 | while i < (len(old_ranges)-1): 108 | # get the next item, compare it with the item that follows it 109 | item = old_ranges[i] 110 | next_item = old_ranges[i+1] 111 | 112 | if item.end >= next_item.start: 113 | old_ranges.pop(i) 114 | old_ranges.pop(i) # effectively this removes item at i+1 115 | 116 | new_range = Range(item.start, max(item.end, next_item.end)) 117 | old_ranges.append(new_range) 118 | old_ranges.sort() 119 | 120 | else: 121 | # only move to the next item of the list if we didn't modify 122 | # the current item 123 | i += 1 124 | 125 | self.ranges = old_ranges 126 | 127 | self.start = self.ranges[0].start 128 | self.end = self.ranges[-1].end 129 | 130 | def add_range(self, range): 131 | self.ranges.append(range) 132 | self._cleanup() 133 | return self 134 | 135 | def add_ranges(self, ranges): 136 | for range in ranges: 137 | self.add_range(range) 138 | return self 139 | 140 | def contains(self, i): 141 | """Determines if i is contained within this list of ranges. 142 | 143 | if i is a number, then this will return True if it falls within 144 | one of the Range objects within this Ranges object. 145 | 146 | If i is a Range object, this will return True if it falls 147 | *entirely* with one of the Range objects within this Ranges 148 | object (i.e. its start and end are completely 'inside' or equal 149 | to a Range in this Ranges). 150 | """ 151 | for r in self.ranges: 152 | if r.contains(i): 153 | return True 154 | 155 | return False 156 | 157 | def number(self): 158 | num = 0 159 | for r in self.ranges: 160 | num += r.end - r.start 161 | return num 162 | 163 | def get_uncovered_portions(self, range): 164 | """Determine which parts of range are not covered by ranges within this Ranges object. 165 | 166 | For example, if I have this Ranges: 167 | (0,3) (5,10) (12,15) 168 | 169 | and I call: 170 | get_uncovered_portions(Range(2, 13)) 171 | 172 | I get back a new Ranges object: 173 | (3,4) (10,12) 174 | 175 | """ 176 | portions = [] 177 | 178 | # if we have no ranges added then none of the given range will 179 | # be covered 180 | if not self.ranges: 181 | return [ range ] 182 | 183 | # if the search range doesn't overlap any items in this range 184 | # this nothing in this range will cover any of the search range 185 | if range.end <= self.start or range.start >= self.end: 186 | return [ range ] 187 | 188 | search_range = Range(range.start, range.end) 189 | i = 0 190 | while i < len(self.ranges): 191 | item = self.ranges[i] 192 | 193 | if item.contains(search_range): 194 | # if search_range is entirely contained, exit loop (return empty list) 195 | break 196 | 197 | elif not item.contains(search_range.start): 198 | # if search_range.start doesn't fall within this item, 199 | # either search_range begins before item.start, or after 200 | # item.end 201 | 202 | if search_range.start < item.start: 203 | 204 | if search_range.end < item.start: 205 | # if search_range ends before this item (ie 206 | # never overlaps) then add a portion 207 | # representing the entire search_range and exit 208 | # the loop, since we've now gone as far as the 209 | # end of the search_range 210 | portions.append(Range(search_range.start, search_range.end)) 211 | break 212 | 213 | else: 214 | # if search_range begins before this item starts 215 | # (and it overlaps with this item), add 216 | # a portion to account for the space between 217 | # search_range.start and item.start, move 218 | # beginning of search_range to item.start, and 219 | # re-run loop without moving to the next item 220 | 221 | portions.append(Range(search_range.start, item.start)) 222 | search_range = Range(item.start, search_range.end) 223 | 224 | else: 225 | # if this item does not overlap search range at all, 226 | # ignore this item 227 | i += 1 # move to next item 228 | 229 | else: 230 | # if overlaps, then move the search_range up to begin at 231 | # the end of this item (since it is 'covered' up to the 232 | # end of this 233 | # item) 234 | search_range = Range(item.end, search_range.end) 235 | 236 | # get the next item (because of _cleanup, we know that 237 | # there will be space between item and next_item) 238 | next_item = None 239 | if i < len(self.ranges)-1: 240 | next_item = self.ranges[i+1] 241 | 242 | if next_item is None or search_range.end <= next_item.start: 243 | # if our search_range finishes before we get to the 244 | # next item, then add a portion for search_range, 245 | # and exit (since we've finished) 246 | 247 | portions.append(Range(search_range.start, search_range.end)) 248 | break 249 | 250 | else: 251 | # if our search_range strays into the next_item 252 | # range, add a portion to cover the space up to the 253 | # beginning of the next item, then move the 254 | # beginning of search_range up to the next item and 255 | # re-run loop 256 | 257 | portions.append(Range(search_range.start, next_item.start)) 258 | search_range = Range(next_item.start, search_range.end) 259 | 260 | i += 1 # move to next item 261 | 262 | return portions 263 | -------------------------------------------------------------------------------- /pcachefs/vfs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import stat 3 | import time 4 | 5 | import fuse 6 | 7 | from pcachefsutil import debug, is_read_only_flags 8 | from pcachefsutil import (E_NO_SUCH_FILE, E_PERM_DENIED, E_NOT_IMPL) 9 | 10 | 11 | class SimpleVirtualFile(object): 12 | """ 13 | A Virtual File that allows you to specify callback functions, called 14 | when the file is read or changed. 15 | 16 | This class is generally much simpler to use than using VirtualFile 17 | directly and hides much of the implementation detail of FUSE. 18 | 19 | Note that in order to track changes properly this class will cache 20 | the content returned by 'callback_on_read' when a file is opened. 21 | This cache is discarded when the file is closed (via the FUSE 22 | release() function). This won't be a problem for you unless you 23 | intend to return a large amount of data (e.g. hundreds of MB) from 24 | 'callback_on_read' in which case you may see performance hits or run 25 | out of memory. To get around this subclass VirtualFile instead of 26 | using SimpleVirtualFile. 27 | """ 28 | def __init__(self, name, callback_on_read, callback_on_change = None): 29 | self.name = name 30 | 31 | self.callback_on_read = callback_on_read 32 | self.callback_on_change = callback_on_change 33 | 34 | self.content = None 35 | 36 | def _get_content(self): 37 | if self.content is None: 38 | result = self.callback_on_read() 39 | 40 | # store content as a list representation of a string, so that 41 | # we can modify it when write() is called 42 | self.content = list(result) 43 | 44 | return ''.join(self.content) 45 | 46 | def is_read_only(self): 47 | """Determines if this file is writeable or not. 48 | 49 | Read-only files will never have their write() functions called 50 | and their content cannot be changed by any users of the 51 | filesystem (including root). 52 | 53 | Returns true if no write_function is specified. 54 | """ 55 | return self.callback_on_change is None 56 | 57 | def read(self, size, offset): 58 | """Read content of this virtual file.""" 59 | return self._get_content()[offset:offset+size] 60 | 61 | def size(self): 62 | """Returns the size of the file, for use in calls to getattr(). 63 | 64 | The default implementation always returns zero. You should 65 | override this to return an accurate value, otherwise apps will 66 | assume the file is empty. 67 | """ 68 | return len(self._get_content()) 69 | 70 | def write(self, buf, offset): 71 | """ 72 | Write content of this virtual file. 73 | 74 | If you override this function you MUST also override is_read_only() 75 | to return True, or it will never be used! 76 | 77 | Should return the number of bytes successfully written. 78 | """ 79 | # Ensure self.content is populated 80 | self._get_content() 81 | 82 | self.content[offset:offset+len(buf)] = buf 83 | return len(buf) 84 | 85 | def truncate(self, size): 86 | """Truncate this virtual file. 87 | 88 | If you override this function you MUST also override is_read_only() 89 | to return True, or it will never be used! 90 | """ 91 | # truncate the string 92 | self.content = list(self._get_content()[:size]) 93 | 94 | return 0 # success 95 | 96 | def release(self): 97 | """Release handle to this file. 98 | 99 | If you override this function you MUST also override is_read_only() 100 | to return True, or it will never be used! 101 | """ 102 | # convert list to string and return it 103 | self.callback_on_change(self._get_content()) 104 | 105 | # clear cache 106 | self.content = None 107 | 108 | def flush(self): # pylint: disable=no-self-use 109 | """Flush any outstanding data waiting to be written to this virtual file. 110 | 111 | If you override this function you MUST also override is_read_only() 112 | to return True, or it will never be used! 113 | """ 114 | return None 115 | 116 | def atime(self): # pylint: disable=no-self-use 117 | """Returns the access time of the file. 118 | 119 | For use in calls to getattr(). The default implementation 120 | returns the current system time. 121 | """ 122 | return time.mktime(time.gmtime()) 123 | 124 | def mtime(self): # pylint: disable=no-self-use 125 | """Returns the modification time of the file. 126 | 127 | For use in calls to getattr(). The default implementation 128 | returns the current system time. 129 | """ 130 | return time.mktime(time.gmtime()) 131 | 132 | def ctime(self): # pylint: disable=no-self-use 133 | """Returns the creation time of the file. 134 | 135 | For use in calls to getattr(). The default implementation 136 | returns the current system time. 137 | """ 138 | return time.mktime(time.gmtime()) 139 | 140 | def uid(self): # pylint: disable=no-self-use 141 | """Returns the UID that owns the file. 142 | 143 | The default implementation returns None, in which case the 144 | VirtualFileFS instance will use the UID of the user currently 145 | accessing the file. 146 | """ 147 | return None 148 | 149 | def gid(self): # pylint: disable=no-self-use 150 | """Returns the GID that owns the file. 151 | 152 | The default implementation returns None, in which case the 153 | VirtualFileFS instance will use the GID of the user currently 154 | accessing the file. 155 | """ 156 | return None 157 | 158 | 159 | class VirtualFS(object): 160 | """Provides a fuse interface to 'virtual' files and directories. 161 | 162 | This class deliberately mimics the FUSE interface, so you can 163 | delegate to it from a real FUSE filesystem, or use it in some other 164 | context. 165 | 166 | Virtual files are represented by instances of VirtualFile stored in 167 | a dict. Virtual files can be made read-only or writeable. 168 | """ 169 | def __init__(self, root, cacher): 170 | """Initialise a new VirtualFileFS. 171 | 172 | Root folder under which all virtual objects will reside. 173 | """ 174 | self.root = root 175 | self.cacher = cacher 176 | 177 | def get_relative_path(self, path): 178 | """Returns path relative to the given root virtual folder.""" 179 | path_xpl = path.split(os.sep) 180 | if path_xpl[0] == '' and path_xpl[1] == self.root: 181 | if len(path_xpl) > 2: 182 | return os.path.join(*path_xpl[2:]) 183 | return '' 184 | return None 185 | 186 | def contains(self, path): 187 | """Returns true if the given path exists as a virtual file.""" 188 | return self.get_relative_path(path) is not None 189 | 190 | 191 | def getattr(self, path): 192 | """Retrieve attributes of a path in the VirtualFS.""" 193 | debug('VirtualFS.getattr', path) 194 | virtual_path = self.get_relative_path(path) 195 | if virtual_path is None: 196 | return E_NO_SUCH_FILE 197 | 198 | parent_path = os.sep + os.path.dirname(virtual_path) 199 | parent_is_file = stat.S_ISREG(self.cacher.getattr(parent_path).st_mode) 200 | if parent_is_file: 201 | if os.path.basename(virtual_path) not in ['cached']: 202 | return E_NO_SUCH_FILE 203 | return self.cacher.getattr(parent_path) 204 | else: 205 | a = self.cacher.getattr(os.sep + virtual_path) 206 | a.st_mode = stat.S_IFDIR | 0o777 207 | return a 208 | 209 | def readdir(self, path, offset): 210 | debug('VirtualFS.readdir', path, offset) 211 | virtual_path = self.get_relative_path(path) 212 | if virtual_path is not None: 213 | is_file = stat.S_ISREG(self.cacher.getattr(os.sep + virtual_path).st_mode) 214 | if is_file: 215 | yield fuse.Direntry('cached') 216 | else: 217 | for f in self.cacher.readdir(os.sep + virtual_path, offset): 218 | yield fuse.Direntry(f.name) 219 | yield None 220 | 221 | if path == '/': 222 | yield fuse.Direntry(self.root) 223 | 224 | def open(self, path, flags): 225 | debug('VirtualFS.open', path, flags) 226 | virtual_path = self.get_relative_path(path) 227 | if virtual_path is None: 228 | return E_NO_SUCH_FILE 229 | 230 | if os.path.basename(virtual_path) in ['cached']: 231 | return 0 232 | 233 | if not is_read_only_flags(flags): 234 | return E_PERM_DENIED 235 | 236 | return 0 237 | 238 | def read(self, path, size, offset): 239 | debug('VirtualFS.read', path, size, offset) 240 | virtual_path = self.get_relative_path(path) 241 | if virtual_path is None: 242 | return E_NO_SUCH_FILE 243 | 244 | parent_path = os.sep + os.path.dirname(virtual_path) 245 | parent_is_file = stat.S_ISREG(self.cacher.getattr(parent_path).st_mode) 246 | if not parent_is_file: 247 | return E_NO_SUCH_FILE 248 | 249 | basename = os.path.basename(virtual_path) 250 | if basename != 'cached': 251 | return E_NO_SUCH_FILE 252 | 253 | attr = self.cacher.getattr(parent_path) 254 | return str(self.cacher.get_cached_blocks(parent_path).number() / float(attr.st_size * attr.st_blksize)) 255 | 256 | def mknod(self, path, mode, dev): # pylint: disable=no-self-use 257 | debug('VirtualFS.mknod', path, mode, dev) 258 | # Don't allow creation of new files 259 | return E_PERM_DENIED 260 | 261 | def unlink(self, path): # pylint: disable=no-self-use 262 | debug('VirtualFS.unlink', path) 263 | # Don't allow removal of files 264 | return E_PERM_DENIED 265 | 266 | def write(self, path, buf, offset): 267 | debug('VirtualFS.write', path, buf, offset) 268 | virtual_path = self.get_relative_path(path) 269 | if virtual_path is None: 270 | return E_NO_SUCH_FILE 271 | 272 | basename = os.path.basename(virtual_path) 273 | if basename == 'cached': 274 | real_path = os.sep + os.path.dirname(virtual_path) 275 | if buf == '1': 276 | attr = self.cacher.underlying_fs.getattr(real_path) 277 | size = attr.st_size * attr.st_blksize 278 | self.cacher.read(real_path, size, 0, force_reload=True) 279 | elif buf == '0': 280 | self.cacher.remove_cached_data(real_path) 281 | else: 282 | return E_NOT_IMPL 283 | return len(buf) 284 | else: 285 | return E_NO_SUCH_FILE 286 | 287 | def truncate(self, path, size): # pylint: disable=no-self-use 288 | debug('VirtualFS.truncate', path, size) 289 | return 0 290 | 291 | def flush(self, path, fh=None): # pylint: disable=no-self-use, unused-argument 292 | debug('VirtualFS.flush', path) 293 | return 0 294 | 295 | def release(self, path, fh=None): # pylint: disable=no-self-use, unused-argument 296 | debug('VirtualFS.release', path) 297 | return 0 298 | 299 | 300 | def fake_stat(virtual_file): 301 | """Create fuse stat from file.""" 302 | if virtual_file is None: 303 | return E_NO_SUCH_FILE 304 | 305 | result = fuse.Stat() 306 | 307 | if virtual_file.is_read_only(): 308 | result.st_mode = stat.S_IFREG | 0o444 309 | else: 310 | result.st_mode = stat.S_IFREG | 0o644 311 | 312 | # Always 1 for now (seems to be safe for files and dirs) 313 | result.st_nlink = 1 314 | 315 | result.st_size = virtual_file.size() 316 | 317 | # Must return seconds-since-epoch timestamps 318 | result.st_atime = virtual_file.atime() 319 | result.st_mtime = virtual_file.mtime() 320 | result.st_ctime = virtual_file.ctime() 321 | 322 | # You can set these to anything, they're set by FUSE 323 | result.st_dev = 1 324 | result.st_ino = 1 325 | 326 | # GetContext() returns uid/gid of the process that 327 | # initiated the syscall currently being handled 328 | context = fuse.FuseGetContext() 329 | if virtual_file.uid() is None: 330 | result.st_uid = context['uid'] 331 | else: 332 | result.st_uid = virtual_file.uid() 333 | 334 | if virtual_file.gid() is None: 335 | result.st_gid = context['gid'] 336 | else: 337 | result.st_gid = virtual_file.gid() 338 | 339 | return result 340 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | --index-url https://pypi.python.org/simple/ 2 | 3 | -e . 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from setuptools import setup 3 | 4 | setup( 5 | name='pCacheFS', 6 | version='0.3', 7 | description='Persistent Caching FUSE Filesystem', 8 | keywords=['fuse', 'cache'], 9 | author='Pierre Penninckx', 10 | author_email='ibizapeanut@gmail.com', 11 | url='http://code.google.com/p/pcachefs', 12 | license='Apache 2.0', 13 | 14 | entry_points={ 15 | 'console_scripts': 'pcachefs=pcachefs.pcachefs:main', 16 | }, 17 | packages=['pcachefs'], 18 | 19 | install_requires=['fuse-python'], 20 | extras_require={ 21 | 'dev': ['ipython'], 22 | 'test': ['mockito', 'pytest', 'pylint'] 23 | }, 24 | ) 25 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibizaman/pcachefs/dce69058037db3f336c475bb39abb2d526efb759/test/__init__.py -------------------------------------------------------------------------------- /test/test_all.py: -------------------------------------------------------------------------------- 1 | #For pytest, pylint: disable=redefined-outer-name, unused-argument 2 | 3 | import os 4 | import signal 5 | import shutil 6 | import stat 7 | import tempfile 8 | import time 9 | from multiprocessing import Process 10 | 11 | import pytest 12 | 13 | from pcachefs import main 14 | 15 | 16 | @pytest.fixture 17 | def rootdir(): 18 | root = os.path.join(os.path.dirname(__file__), 'testroot') 19 | try: 20 | os.makedirs(root) 21 | except OSError: 22 | pass 23 | dir = tempfile.mkdtemp(dir=root) 24 | yield dir 25 | shutil.rmtree(dir) 26 | 27 | 28 | @pytest.fixture 29 | def sourcedir(rootdir): 30 | dir = os.path.join(rootdir, 'source') 31 | os.makedirs(dir) 32 | yield dir 33 | 34 | 35 | @pytest.fixture 36 | def cachedir(rootdir): 37 | dir = os.path.join(rootdir, 'cache') 38 | os.makedirs(dir) 39 | yield dir 40 | 41 | 42 | @pytest.fixture 43 | def mountdir(rootdir): 44 | dir = os.path.join(rootdir, 'mount') 45 | os.makedirs(dir) 46 | yield dir 47 | 48 | 49 | @pytest.fixture 50 | def pcachefs(sourcedir, cachedir, mountdir): 51 | signal.signal(signal.SIGINT, signal.SIG_DFL) 52 | p = Process(target=main, args=(['-d', '-s', '-c', cachedir, '-t', sourcedir, mountdir],)) 53 | p.start() 54 | yield 55 | os.kill(p.pid, signal.SIGINT) 56 | p.join() 57 | 58 | 59 | def write_to_file(dirname, path, content): 60 | with open(os.path.join(dirname, *path), 'w') as f: 61 | f.write(content) 62 | # Needed to let pcachefs propagate changes 63 | time.sleep(.1) 64 | 65 | 66 | def create_directory(dirname, path): 67 | os.makedirs(os.path.join(dirname, *path)) 68 | # Needed to let pcachefs propagate changes 69 | time.sleep(.1) 70 | 71 | 72 | def read_from_file(dirname, path): 73 | try: 74 | with open(os.path.join(dirname, *path), 'r') as f: 75 | return f.read() 76 | except IOError as e: 77 | print('Could not open', os.path.join(dirname, *path), e) 78 | return None 79 | 80 | 81 | def remove_file(dirname, path): 82 | os.remove(os.path.join(dirname, *path)) 83 | 84 | 85 | class ListDir(object): 86 | def __init__(self, files, dirs): 87 | self.files = sorted(list(files)) 88 | self.dirs = sorted(list(dirs)) 89 | 90 | def __repr__(self): 91 | return ''.format(self.files, self.dirs) 92 | 93 | def __eq__(self, other): 94 | return self.dirs == other.dirs and self.files == other.files 95 | 96 | def __contains__(self, what): 97 | return what in self.files or what in self.dirs 98 | 99 | 100 | def list_dir(dirname, path=None): 101 | files = set() 102 | dirs = set() 103 | root = os.path.join(dirname, *(path or [])) 104 | for object in os.listdir(root): 105 | is_dir = stat.S_ISDIR(os.stat(os.path.join(root, object)).st_mode) 106 | if is_dir: 107 | dirs.add(object) 108 | else: 109 | files.add(object) 110 | 111 | return ListDir(files, dirs) 112 | 113 | 114 | def test_create_file(pcachefs, sourcedir, mountdir): 115 | assert 'a' not in list_dir(sourcedir) 116 | assert 'a' not in list_dir(mountdir) 117 | assert read_from_file(sourcedir, ['a']) is None 118 | assert read_from_file(mountdir, ['a']) is None 119 | 120 | write_to_file(sourcedir, ['a'], '1') 121 | assert 'a' in list_dir(sourcedir) 122 | assert 'a' in list_dir(mountdir) 123 | assert read_from_file(sourcedir, ['a']) == '1' 124 | assert read_from_file(mountdir, ['a']) == '1' 125 | 126 | 127 | def test_cached_file_not_updated(pcachefs, sourcedir, mountdir): 128 | write_to_file(sourcedir, ['a'], '1') 129 | # load in cache 130 | read_from_file(mountdir, ['a']) 131 | write_to_file(sourcedir, ['a'], '2') 132 | assert 'a' in list_dir(sourcedir) 133 | assert 'a' in list_dir(mountdir) 134 | assert read_from_file(sourcedir, ['a']) == '2' 135 | assert read_from_file(mountdir, ['a']) == '1' 136 | 137 | 138 | def test_only_cached_file_at_read(pcachefs, sourcedir, mountdir): 139 | write_to_file(sourcedir, ['a'], '1') 140 | write_to_file(sourcedir, ['a'], '2') 141 | assert 'a' in list_dir(sourcedir) 142 | assert 'a' in list_dir(mountdir) 143 | assert read_from_file(sourcedir, ['a']) == '2' 144 | assert read_from_file(mountdir, ['a']) == '2' 145 | 146 | 147 | def test_create_directory(pcachefs, sourcedir, mountdir): 148 | assert 'a' not in list_dir(sourcedir) 149 | assert 'a' not in list_dir(mountdir) 150 | assert read_from_file(sourcedir, ['a']) is None 151 | assert read_from_file(mountdir, ['a']) is None 152 | 153 | create_directory(sourcedir, ['a']) 154 | assert 'a' in list_dir(sourcedir) 155 | assert 'a' in list_dir(mountdir) 156 | write_to_file(sourcedir, ['a', 'a'], '1') 157 | assert 'a' in list_dir(sourcedir) 158 | assert 'a' in list_dir(mountdir) 159 | assert read_from_file(sourcedir, ['a', 'a']) == '1' 160 | assert read_from_file(mountdir, ['a', 'a']) == '1' 161 | 162 | 163 | def test_cached_directory_not_updated(pcachefs, sourcedir, mountdir): 164 | assert 'a' not in list_dir(sourcedir) 165 | assert 'a' not in list_dir(mountdir) 166 | assert read_from_file(sourcedir, ['a']) is None 167 | assert read_from_file(mountdir, ['a']) is None 168 | 169 | create_directory(sourcedir, ['a']) 170 | assert 'a' in list_dir(sourcedir) 171 | assert 'a' in list_dir(mountdir) 172 | write_to_file(sourcedir, ['a', 'a'], '1') 173 | assert 'a' in list_dir(sourcedir, ['a']) 174 | assert 'a' in list_dir(mountdir, ['a']) 175 | assert read_from_file(sourcedir, ['a', 'a']) == '1' 176 | assert read_from_file(mountdir, ['a', 'a']) == '1' 177 | 178 | # FIXME: not consistent behavior, b does not appear in listdir 179 | # although we can read the file. 180 | write_to_file(sourcedir, ['a', 'a'], '2') 181 | write_to_file(sourcedir, ['a', 'b'], '3') 182 | assert 'a' in list_dir(sourcedir, ['a']) 183 | assert 'a' in list_dir(mountdir, ['a']) 184 | assert 'b' in list_dir(sourcedir, ['a']) 185 | assert 'b' not in list_dir(mountdir, ['a']) 186 | assert read_from_file(sourcedir, ['a', 'a']) == '2' 187 | assert read_from_file(mountdir, ['a', 'a']) == '1' 188 | assert read_from_file(sourcedir, ['a', 'b']) == '3' 189 | assert read_from_file(mountdir, ['a', 'b']) == '3' 190 | 191 | 192 | def test_read_cache(pcachefs, sourcedir, mountdir): 193 | write_to_file(sourcedir, ['a'], '1') 194 | assert list_dir(mountdir) == ListDir(['a'], ['.pcachefs']) 195 | assert list_dir(mountdir, ['.pcachefs']) == ListDir([], ['a']) 196 | assert list_dir(mountdir, ['.pcachefs', 'a']) == ListDir(['cached'], []) 197 | assert read_from_file(mountdir, ['.pcachefs', 'a', 'cached']) == '0' 198 | read_from_file(mountdir, ['a']) 199 | assert read_from_file(mountdir, ['.pcachefs', 'a', 'cached']) == '1' 200 | 201 | 202 | def test_reload_cache(pcachefs, sourcedir, mountdir): 203 | write_to_file(sourcedir, ['a'], '1') 204 | # load in cache 205 | read_from_file(mountdir, ['a']) 206 | write_to_file(sourcedir, ['a'], '2') 207 | assert read_from_file(mountdir, ['a']) == '1' 208 | assert list_dir(mountdir) == ListDir(['a'], ['.pcachefs']) 209 | assert read_from_file(mountdir, ['.pcachefs', 'a', 'cached']) == '1' 210 | write_to_file(mountdir, ['.pcachefs', 'a', 'cached'], '1') 211 | # remove source file 212 | remove_file(sourcedir, ['a']) 213 | assert read_from_file(mountdir, ['.pcachefs', 'a', 'cached']) == '1' 214 | assert read_from_file(mountdir, ['a']) == '2' 215 | 216 | 217 | def test_remove_cache(pcachefs, sourcedir, mountdir, cachedir): 218 | assert read_from_file(cachedir, 'cache.data') is None 219 | write_to_file(sourcedir, ['a'], '1') 220 | assert read_from_file(mountdir, ['a']) == '1' 221 | assert read_from_file(cachedir, ['a', 'cache.data']) == '1' 222 | write_to_file(mountdir, ['.pcachefs', 'a', 'cached'], '0') 223 | assert read_from_file(cachedir, ['a', 'cache.data']) is None 224 | assert read_from_file(mountdir, ['a']) == '1' 225 | assert read_from_file(cachedir, ['a', 'cache.data']) == '1' 226 | --------------------------------------------------------------------------------