├── requirements.txt ├── README.md ├── .gitignore ├── httpfs.py └── parser.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | fusepy 3 | beautifulsoup4 4 | six 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HTTPFS 2 | === 3 | 4 | HTTPFS is a fuse filesystem capable of mounting a typical Apache directory index as a local read-only filesystem. 5 | 6 | Requirements 7 | --- 8 | Python dependencies (works on both Python 2 and 3): 9 | - requests 10 | - fusepy 11 | - beautifulsoup4 12 | - six 13 | 14 | Usage 15 | --- 16 | ``` 17 | usage: httpfs.py [-h] [--foreground] [--debug] [--nothreads] [--no_ssl_verify] 18 | [--allow_other] [-o OPTIONS] 19 | http_resource mountpoint 20 | 21 | positional arguments: 22 | http_resource Target web directory index 23 | mountpoint Target directory 24 | 25 | optional arguments: 26 | -h, --help show this help message and exit 27 | --foreground Do not fork into background (default: False) 28 | --debug Enable debug logging (default: False) 29 | --nothreads Disable fuse threads (default: False) 30 | --no_ssl_verify Disable SSL Verification (default: False) 31 | --allow_other Allow users other than the one running the command to 32 | access the directory. (default: False) 33 | -o OPTIONS, --options OPTIONS 34 | Mount-style variant of the above options 35 | (e.g. -o no_ssl_verify,allow_other (default: ) 36 | ``` 37 | 38 | Registering mount command 39 | --- 40 | You can register this filesystem (so it can be used in fstab or with the mount command) in the following way: 41 | ```bash 42 | # Clone the repository 43 | git clone https://github.com/Kurocon/HTTPfs.git 44 | 45 | # Change into directory 46 | cd HTTPfs 47 | 48 | # Make a symbolic link to /usr/sbin/ 49 | sudo ln -s `pwd`/httpfs.py /usr/sbin/mount.httpfs 50 | ``` 51 | 52 | You should now be able to use the filesystem in the following ways: 53 | ``` 54 | # In /etc/fstab 55 | http://some.server/ /mnt/mountpoint httpfs.py allow_others 0 0 56 | 57 | # In a normal mount command 58 | sudo mount.httpfs http://some.server/ /mnt/mountpoint -o allow_others 59 | 60 | # Or using the normal mount command 61 | sudo mount -t httpfs http://some.server/ /mnt/mountpoint -o allow_others 62 | ``` 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff: 7 | .idea/ 8 | 9 | ## File-based project format: 10 | *.iws 11 | 12 | ## Plugin-specific files: 13 | 14 | # IntelliJ 15 | /out/ 16 | 17 | # mpeltonen/sbt-idea plugin 18 | .idea_modules/ 19 | 20 | # JIRA plugin 21 | atlassian-ide-plugin.xml 22 | 23 | # Crashlytics plugin (for Android Studio and IntelliJ) 24 | com_crashlytics_export_strings.xml 25 | crashlytics.properties 26 | crashlytics-build.properties 27 | fabric.properties 28 | ### Python template 29 | # Byte-compiled / optimized / DLL files 30 | __pycache__/ 31 | *.py[cod] 32 | *$py.class 33 | 34 | # C extensions 35 | *.so 36 | 37 | # Distribution / packaging 38 | .Python 39 | env/ 40 | build/ 41 | develop-eggs/ 42 | dist/ 43 | downloads/ 44 | eggs/ 45 | .eggs/ 46 | lib/ 47 | lib64/ 48 | parts/ 49 | sdist/ 50 | var/ 51 | *.egg-info/ 52 | .installed.cfg 53 | *.egg 54 | 55 | # PyInstaller 56 | # Usually these files are written by a python script from a template 57 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 58 | *.manifest 59 | *.spec 60 | 61 | # Installer logs 62 | pip-log.txt 63 | pip-delete-this-directory.txt 64 | 65 | # Unit test / coverage reports 66 | htmlcov/ 67 | .tox/ 68 | .coverage 69 | .coverage.* 70 | .cache 71 | nosetests.xml 72 | coverage.xml 73 | *,cover 74 | .hypothesis/ 75 | 76 | # Translations 77 | *.mo 78 | *.pot 79 | 80 | # Django stuff: 81 | *.log 82 | local_settings.py 83 | 84 | # Flask stuff: 85 | instance/ 86 | .webassets-cache 87 | 88 | # Scrapy stuff: 89 | .scrapy 90 | 91 | # Sphinx documentation 92 | docs/_build/ 93 | 94 | # PyBuilder 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # pyenv 101 | .python-version 102 | 103 | # celery beat schedule file 104 | celerybeat-schedule 105 | 106 | # dotenv 107 | .env 108 | 109 | # virtualenv 110 | .venv/ 111 | venv/ 112 | ENV/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | -------------------------------------------------------------------------------- /httpfs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import logging 4 | 5 | import argparse 6 | import requests 7 | import six 8 | 9 | from fuse import FUSE, Operations, FuseOSError, ENOENT 10 | from parser import Directory, File 11 | 12 | 13 | class HTTPfs(Operations): 14 | def __init__(self, root, verify_ssl=True): 15 | self.root = root 16 | self.log = logging.getLogger(__name__) 17 | self.readdir_cache = {} 18 | self.attr_cache = {} 19 | self.file_cache = {} 20 | self.session = requests.Session() 21 | if not verify_ssl: 22 | self.log.warn("Disabling SSL verification!") 23 | self.session.verify = False 24 | 25 | def readdir(self, path, fh): 26 | path = path.strip("/") 27 | path = six.text_type(path) 28 | 29 | self.log.debug(u"[READDIR] Reading path {}".format(path)) 30 | if path not in self.readdir_cache.keys(): 31 | self.readdir_cache[path] = Directory(self.root, path, self.session).contents() 32 | 33 | return [x[0] for x in self.readdir_cache[path]] 34 | 35 | def read(self, path, length, offset, fh): 36 | path = path.strip("/") 37 | path = six.text_type(path) 38 | 39 | self.log.debug(u"[READ] Reading path {}, {} bytes from {}".format(path, length, offset)) 40 | if path not in self.file_cache.keys(): 41 | self.file_cache[path] = File(self.root, path, self, self.session) 42 | 43 | return self.file_cache[path].read(length, offset) 44 | 45 | def getattr(self, path, fh=None): 46 | path = path.strip("/") 47 | path = six.text_type(path) 48 | 49 | self.log.debug(u"[GETATTR] Path {}".format(path)) 50 | if path not in self.attr_cache.keys(): 51 | try: 52 | if path not in self.file_cache.keys(): 53 | self.file_cache[path] = File(self.root, path, self, self.session) 54 | self.attr_cache[path] = self.file_cache[path].attributes() 55 | except FuseOSError: 56 | self.attr_cache[path] = None 57 | raise FuseOSError(ENOENT) 58 | 59 | if self.attr_cache[path] is not None: 60 | return self.attr_cache[path] 61 | else: 62 | raise FuseOSError(ENOENT) 63 | 64 | # Disable unused operations: 65 | access = None 66 | flush = None 67 | getxattr = None 68 | listxattr = None 69 | open = None 70 | opendir = None 71 | release = None 72 | releasedir = None 73 | statfs = None 74 | 75 | 76 | if __name__ == '__main__': 77 | FORMAT = "%(created)f - %(thread)d (%(name)s) - [%(levelname)s] %(message)s" 78 | logging.basicConfig(level=logging.INFO, format=FORMAT) 79 | 80 | p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 81 | p.add_argument("http_resource", help="Target web directory index") 82 | p.add_argument("mountpoint", help="Target directory") 83 | p.add_argument("--foreground", action="store_true", help="Do not fork into background") 84 | p.add_argument("--debug", action="store_true", help="Enable debug logging") 85 | p.add_argument("--nothreads", action="store_true", help="Disable fuse threads") 86 | p.add_argument("--no_ssl_verify", action="store_true", help="Disable SSL Verification") 87 | p.add_argument("--allow_other", action="store_true", help="Allow users other than the one running the command " 88 | "to access the directory.") 89 | 90 | p.add_argument("-o", "--options", type=str, default="", help="Mount-style variant of the above options " 91 | "(e.g. -o debug,allow_other") 92 | 93 | args = vars(p.parse_args(sys.argv[1:])) 94 | 95 | fsroot = six.text_type(args.pop("http_resource").strip("/")) 96 | mountpoint = args.pop("mountpoint") 97 | 98 | fuse_kwargs = { 99 | 'nothreads': True if args.pop("nothreads") else False, 100 | 'foreground': True if args.pop("foreground") else False, 101 | 'debug': True if args.pop("debug") else False, 102 | 'allow_other': True if args.pop("allow_other") else False, 103 | } 104 | 105 | o_args_list = [x.strip() for x in args.pop("o").split(",")] 106 | o_args = {} 107 | for x in o_args_list: 108 | xs = [y.strip() for y in x.split("=")] 109 | if len(xs) > 1: 110 | fuse_kwargs[xs[0]] = xs[1:] 111 | else: 112 | fuse_kwargs[x] = True 113 | 114 | if fuse_kwargs['debug']: 115 | logging.basicConfig(level=logging.DEBUG, format=FORMAT) 116 | 117 | FUSE(HTTPfs(fsroot, verify_ssl=False if args.pop("no_ssl_verify") else True), mountpoint, **fuse_kwargs) 118 | -------------------------------------------------------------------------------- /parser.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import six 3 | import time 4 | 5 | from errno import ENOENT 6 | from stat import S_IFDIR, S_IFREG 7 | from datetime import datetime 8 | from bs4 import BeautifulSoup 9 | from collections import defaultdict 10 | from fuse import FuseOSError, EIO 11 | 12 | 13 | class Directory: 14 | def __init__(self, root, path, session): 15 | self.root = root 16 | self.path = path 17 | self.session = session 18 | self.log = logging.getLogger("Directory") 19 | self.log.debug(u"[INIT] Loading directory {}/{}".format(root, path)) 20 | 21 | def contents(self): 22 | """ 23 | Give the contents of the directory 24 | :return: List of Entities that are in the directory 25 | :rtype: list 26 | """ 27 | contents = [(".", True), ("..", True)] 28 | 29 | # Do a request, and run it through an HTML parser. 30 | response = self.session.get(u"{}/{}/".format(self.root, self.path)) 31 | parsed = BeautifulSoup(response.text, 'html.parser') 32 | 33 | # Find all of the entity elements, remove the cruft 34 | for x in parsed.find_all("tr"): 35 | if x.td is not None and x.td.img['alt'] != "[PARENTDIR]": 36 | is_dir = x.td.img['alt'] == "[DIR]" 37 | contents.append((x.find_all('td')[1].a.string.strip("/"), is_dir)) 38 | 39 | return contents 40 | 41 | 42 | class File: 43 | def __init__(self, root, path, httpfs, session): 44 | self.root = root 45 | self.path = path 46 | self.session = session 47 | self.log = logging.getLogger("File") 48 | self.log.debug(u"[INIT] Loading file {}/{}".format(root, path)) 49 | self.readbuffer = defaultdict(lambda: None) 50 | 51 | # Determine if this is a directory 52 | parent_dir = "/".join(self.path.split("/")[:-1]) 53 | filename = self.path.split("/")[-1] 54 | if parent_dir not in httpfs.readdir_cache.keys(): 55 | httpfs.readdir_cache[parent_dir] = Directory(self.root, parent_dir, self.session).contents() 56 | 57 | dirs = [six.text_type(x[0]) for x in httpfs.readdir_cache[parent_dir] if x[1]] 58 | self.is_dir = (six.text_type(filename) in dirs) or six.text_type(filename) == six.text_type("") 59 | 60 | # Determine file size 61 | self.url = u"{}/{}{}".format(self.root, self.path, "/" if self.is_dir else "") 62 | self.r = self.session.head(self.url, allow_redirects=True) 63 | if self.r.status_code == 200: 64 | try: 65 | self.size = int(self.r.headers['Content-Length']) 66 | except KeyError: 67 | self.size = 0 68 | 69 | try: 70 | mtime_string = self.r.headers["Last-Modified"] 71 | self.mtime = time.mktime(datetime.strptime(mtime_string, "%a, %d %b %Y %H:%M:%S %Z").timetuple()) 72 | except KeyError: 73 | self.mtime = time.time() 74 | else: 75 | self.log.info(u"[INIT] Non-200 code while getting {}: {}".format(self.url, self.r.status_code)) 76 | self.size = 0 77 | 78 | def read(self, length, offset): 79 | """ 80 | Reads the file. 81 | :param length: The length to read 82 | :param offset: The offset to start at 83 | :return: The file's bytes 84 | """ 85 | self.log.debug(u"[READ] Reading file {}/{}".format(self.root, self.path)) 86 | url = u"{}/{}".format(self.root, self.path) 87 | 88 | # Calculate megabyte-section this offset/length is in 89 | mb_start = (offset // 1024) // 1024 90 | mb_end = ((offset + length) // 1024) // 1024 91 | offset_from_mb = (((offset // 1024) % 1024) * 1024) + (offset % 1024) 92 | self.log.debug(u"Calculated MB_Start {} MB_End {} Offset from MB: {}".format(mb_start, mb_end, offset_from_mb)) 93 | if mb_start == mb_end: 94 | self.log.debug(u"Readbuffer filled for mb_start? {}".format(self.readbuffer[mb_start] is not None)) 95 | if self.readbuffer[mb_start] is None: 96 | # Fill buffer for this MB 97 | bytesRange = u'{}-{}'.format(mb_start * 1024 * 1024, (mb_start * 1024 * 1024) + (1023 * 1024)) 98 | self.log.debug(u"Fetching byte range {}".format(bytesRange)) 99 | headers = {'range': 'bytes=' + bytesRange} 100 | r = self.session.get(url, headers=headers) 101 | if r.status_code == 200 or r.status_code == 206: 102 | self.readbuffer[mb_start] = r.content 103 | # noinspection PyTypeChecker 104 | self.log.debug(u"Read {} bytes.".format(len(self.readbuffer[mb_start]))) 105 | else: 106 | self.log.info(u"[INIT] Non-200 code while getting {}: {}".format(url, r.status_code)) 107 | raise FuseOSError(EIO) 108 | 109 | self.log.debug(u"Returning indices {} to {}".format(offset_from_mb, offset_from_mb+length)) 110 | return self.readbuffer[mb_start][offset_from_mb:offset_from_mb+length] 111 | else: 112 | self.log.debug(u"Offset/Length spanning multiple MB's. Fetching normally") 113 | # Spanning multiple MB's, just get it normally 114 | # Set range 115 | bytesRange = u'{}-{}'.format(offset, min(self.size, offset + length - 1)) 116 | self.log.debug(u"Fetching byte range {}".format(bytesRange)) 117 | headers = {'range': 'bytes=' + bytesRange} 118 | r = self.session.get(url, headers=headers) 119 | if self.r.status_code == 200 or r.status_code == 206: 120 | return r.content 121 | else: 122 | self.log.info(u"[INIT] Non-200 code while getting {}: {}".format(url, r.status_code)) 123 | raise FuseOSError(EIO) 124 | 125 | def attributes(self): 126 | self.log.debug(u"[ATTR] Attributes of file {}/{}".format(self.root, self.path)) 127 | 128 | if self.r.status_code != 200: 129 | raise FuseOSError(ENOENT) 130 | 131 | mode = (S_IFDIR | 0o777) if self.is_dir else (S_IFREG | 0o666) 132 | 133 | attrs = { 134 | 'st_atime': self.mtime, 135 | 'st_mode': mode, 136 | 'st_mtime': self.mtime, 137 | 'st_size': self.size, 138 | } 139 | 140 | if self.is_dir: 141 | attrs['st_nlink'] = 2 142 | 143 | return attrs 144 | --------------------------------------------------------------------------------