├── .github └── workflows │ └── python-app.yml ├── .gitignore ├── LICENSE ├── README.md ├── setup.py ├── tests ├── __init__.py └── test_main.py └── wddbfs ├── __init__.py ├── cli.py └── main.py /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 3.10 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.10" 26 | cache: pip 27 | cache-dependency-path: setup.py 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install flake8 pytest 32 | pip install . 33 | - name: Lint with flake8 34 | run: | 35 | # stop the build if there are Python syntax errors or undefined names 36 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 37 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 38 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 39 | - name: Test with pytest 40 | run: | 41 | pytest 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 Adam Obeng 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![example workflow](https://github.com/adamobeng/wddbfs/actions/workflows/python-app.yml/badge.svg) 2 | 3 | wddbfs is a Python package which implements webdavfs that exposes the contents of sqlite databases to the filesystem 4 | 5 | # Installation 6 | 7 | `pip install git+https://github.com/adamobeng/wddbfs` 8 | 9 | # Usage 10 | 11 | ``` 12 | usage: wddbfs [-h] [-c CONFIG] [--host HOST] [--port PORT] [--log-level LOG_LEVEL] [--formats FORMATS] [--timeout TIMEOUT] [--anonymous] [--username USERNAME] [--password PASSWORD] [--db-path DB_PATH [DB_PATH ...]] [--allow-abspath] 13 | 14 | options: 15 | -h, --help show this help message and exit 16 | -c CONFIG, --config CONFIG 17 | config file path 18 | --host HOST 19 | --port PORT 20 | --log-level LOG_LEVEL 21 | --formats FORMATS 22 | --timeout TIMEOUT 23 | --anonymous allow access without authentication 24 | --username USERNAME 25 | --password PASSWORD 26 | --db-path DB_PATH [DB_PATH ...] 27 | paths to sqlite database files 28 | --allow-abspath make it possible to access any database on the host filesystem by specifying its absolute path relative to the WebDAV root (e.g. /mount/webdav/absolute/path/on/host/fs/to/db.sqlite) 29 | 30 | Args that start with '--' (eg. --host) can also be set in a config file (specified via -c). Config file syntax allows: key=value, flag=true, stuff=[a,b,c] (for details, see syntax at https://goo.gl/R74nmi). If an arg is specified in 31 | more than one place, then commandline values override config file values which override defaults. 32 | ``` 33 | 34 | More information [in this blog post](https://adamobeng.com/wddbfs-mount-a-sqlite-database-as-a-filesystem/). 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="wddbfs", 5 | version="0.0.1", 6 | author="Adam Obeng", 7 | description="webdavfs provider which can read the contents of sqlite databases", 8 | entry_points={ 9 | "console_scripts": ["wddbfs=wddbfs.cli:cli"], 10 | }, 11 | packages=["wddbfs"], 12 | license_files=("LICENSE",), 13 | install_requires=[ 14 | "cheroot", 15 | "wsgidav", 16 | "configargparse", 17 | "pandas", 18 | ], 19 | ) 20 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamobeng/wddbfs/5c68aabf647c313df7eb8c0371a5f9c3e88f7c03/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import patch, MagicMock 3 | 4 | 5 | import wddbfs.main 6 | 7 | 8 | table_contents = b"A,B,C\n1,2,3" 9 | DBMock = MagicMock() 10 | DBMock.name = "test.sqlite" 11 | DBMock.table_names = ["test_table"] 12 | DBMock.table_contents.return_value = table_contents 13 | 14 | DBMockClass = MagicMock() 15 | DBMockClass.return_value = DBMock 16 | DBMockClass = DBMockClass 17 | 18 | 19 | class TestDBResourceProvider(TestCase): 20 | @patch("wddbfs.main.DB", new=DBMockClass) 21 | def test_table_contents(self): 22 | rp = wddbfs.main.DBResourceProvider(db_paths=["test"]) 23 | root = rp.get_resource_inst("/", MagicMock()) 24 | self.assertEqual(root.get_member_names(), ("test.sqlite",)) 25 | 26 | dbcollection = root.get_member("test.sqlite") 27 | self.assertEqual( 28 | dbcollection.get_member_names(), 29 | ["test_table.csv", "test_table.tsv", "test_table.json", "test_table.jsonl"], 30 | ) 31 | 32 | table_artifact = dbcollection.get_member("test_table.csv") 33 | self.assertEqual(table_artifact.get_content(), table_contents) 34 | -------------------------------------------------------------------------------- /wddbfs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamobeng/wddbfs/5c68aabf647c313df7eb8c0371a5f9c3e88f7c03/wddbfs/__init__.py -------------------------------------------------------------------------------- /wddbfs/cli.py: -------------------------------------------------------------------------------- 1 | from cheroot import wsgi 2 | from wsgidav.wsgidav_app import WsgiDAVApp 3 | import logging 4 | import configargparse 5 | 6 | import wddbfs.main 7 | 8 | 9 | def cli(): 10 | p = configargparse.ArgParser(default_config_files=[]) 11 | p.add( 12 | "-c", "--config", required=False, is_config_file=True, help="config file path" 13 | ) 14 | p.add("--host", required=False, default="127.0.0.1") 15 | p.add("--port", required=False, default="8080") 16 | p.add("--log-level", required=False, default="ERROR") 17 | p.add( 18 | "--formats", required=False, default=list(wddbfs.main.TABLE_FORMATTERS.keys()) 19 | ) 20 | p.add("--timeout", required=False, default=0.250) 21 | p.add( 22 | "--anonymous", action="store_true", help="allow access without authentication" 23 | ) 24 | p.add("--username", help="") 25 | p.add("--password", help="") 26 | p.add("--db-path", nargs="+", help="paths to sqlite database files") 27 | p.add( 28 | "--allow-abspath", 29 | action="store_true", 30 | required=False, 31 | default=False, 32 | help=( 33 | "make it possible to access any database on the host filesystem by specifying its absolute path relative to the " 34 | "WebDAV root (e.g. /mount/webdav/absolute/path/on/host/fs/to/db.sqlite)" 35 | ), 36 | ) 37 | 38 | options = p.parse_args() 39 | 40 | logger = logging.getLogger("wsgidav") 41 | logger.propagate = True 42 | logger.setLevel(getattr(logging, options.log_level)) 43 | logging.basicConfig( 44 | level=getattr(logging, options.log_level), 45 | format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", 46 | ) 47 | 48 | user_mapping = {"*": []} 49 | if options.username is not None and options.password is not None: 50 | user_mapping["*"][options.username] = {"password": options.password} 51 | if options.anonymous: 52 | user_mapping["*"] = True 53 | if user_mapping == {"*": []}: 54 | raise Exception( 55 | "Either specify a username and password or pass --anonymous to allow unauthenticated access" 56 | ) 57 | 58 | config = { 59 | "host": options.host, 60 | "port": int(options.port), 61 | "provider_mapping": { 62 | "/": wddbfs.main.DBResourceProvider( 63 | db_paths=options.db_path or [], 64 | formats=options.formats, 65 | allow_abspath=options.allow_abspath, 66 | ), 67 | }, 68 | "simple_dc": {"user_mapping": user_mapping}, 69 | "http_authenticator": {}, 70 | } 71 | app = WsgiDAVApp(config) 72 | 73 | server_args = { 74 | "bind_addr": (config["host"], config["port"]), 75 | "wsgi_app": app, 76 | "timeout": options.timeout, 77 | } 78 | server = wsgi.Server(**server_args) 79 | server.start() 80 | -------------------------------------------------------------------------------- /wddbfs/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # (c) 2009-2023 Martin Wendt and contributors; see WsgiDAV https://github.com/mar10/wsgidav 3 | # (c) 2024-2024 Adam Obeng 4 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 5 | 6 | # TODO 7 | # - write support 8 | # - add caching based on file update time 9 | 10 | import pandas as pd 11 | import os 12 | import io 13 | from urllib.parse import quote 14 | 15 | from wsgidav import util 16 | 17 | from wsgidav.dav_provider import DAVCollection, DAVNonCollection, DAVProvider 18 | from wsgidav.util import join_uri 19 | import sqlite3 20 | import json 21 | 22 | __docformat__ = "reStructuredText en" 23 | 24 | _logger = util.get_module_logger(__name__) 25 | 26 | BUFFER_SIZE = 8192 27 | 28 | 29 | TABLE_FORMATTERS = { 30 | ".csv": lambda q, con, o: pd.read_sql_query(q, con).to_csv(o, index=False), 31 | ".tsv": lambda q, con, o: pd.read_sql_query(q, con).to_csv( 32 | o, sep="\t", index=False 33 | ), 34 | ".json": lambda q, con, o: pd.read_sql_query(q, con).to_json(o, orient="records"), 35 | ".jsonl": lambda q, con, o: o.write( 36 | "\n".join( 37 | [json.dumps(r.to_dict()) for _, r in pd.read_sql_query(q, con).iterrows()] 38 | ).encode("utf8") 39 | ), 40 | } 41 | 42 | 43 | class PathCollection(DAVCollection): 44 | """used for recurstively finding database file by absolute path on host filesystem""" 45 | 46 | def __init__(self, path, environ, resource_provider, formats): 47 | super().__init__(path, environ) 48 | self.formats = formats 49 | self.resource_provider = resource_provider 50 | 51 | def get_member(self, name): 52 | p = join_uri(self.path, name) 53 | if os.path.exists(p): 54 | if os.path.isdir(p): 55 | return PathCollection( 56 | p, 57 | self.environ, 58 | resource_provider=self.resource_provider, 59 | formats=self.formats, 60 | ) 61 | 62 | else: 63 | return DBCollection( 64 | p, 65 | environ=self.environ, 66 | resource_provider=self.resource_provider, 67 | formats=self.formats, 68 | ) 69 | return None 70 | 71 | def get_member_names(self): 72 | return [] 73 | 74 | 75 | class RootCollection(DAVCollection): 76 | """Resolve top-level requests '/'.""" 77 | 78 | def __init__(self, environ, resource_provider, formats=TABLE_FORMATTERS.keys()): 79 | self.resource_provider = resource_provider 80 | self.formats = formats 81 | super().__init__("/", environ) 82 | 83 | @property 84 | def _member_names(self): 85 | r = tuple(d.name for d in self.resource_provider.dbs) 86 | return r 87 | 88 | def get_member_names(self): 89 | r = self._member_names 90 | return r 91 | 92 | def get_member(self, name): 93 | if name in self._member_names: 94 | return DBCollection( 95 | path=join_uri(self.path, name), 96 | environ=self.environ, 97 | resource_provider=self.resource_provider, 98 | formats=self.formats, 99 | ) 100 | elif self.resource_provider.allow_abspath and os.path.exists( 101 | join_uri("/", name) 102 | ): 103 | return PathCollection( 104 | join_uri("/", name), 105 | self.environ, 106 | resource_provider=self.resource_provider, 107 | formats=self.formats, 108 | ) 109 | 110 | return None 111 | 112 | 113 | class DBCollection(DAVCollection): 114 | """Top level database, contains tables""" 115 | 116 | # TOOD: support multiple databases per file 117 | 118 | def __init__( 119 | self, path, environ, resource_provider, formats=TABLE_FORMATTERS.keys() 120 | ): 121 | self.resource_provider = resource_provider 122 | self.formats = formats 123 | super().__init__(path, environ) 124 | 125 | def get_display_info(self): 126 | return {"type": "Category type"} 127 | 128 | @property 129 | def db(self): 130 | return self.resource_provider.db(self.path[1:]) # remove first slash 131 | 132 | def get_member_names(self): 133 | r = [f + e for f in self.db.table_names for e in self.formats] 134 | # print(f'get_member_names() -> {r}') 135 | return r 136 | 137 | def get_member(self, name): 138 | if name in self.get_member_names(): 139 | return TableArtifact( 140 | path=join_uri(self.path, name), environ=self.environ, db_collection=self 141 | ) 142 | return None 143 | 144 | 145 | class _VirtualNonCollection(DAVNonCollection): 146 | """Abstract base class for all non-collection resources.""" 147 | 148 | def __init__(self, path, environ): 149 | super().__init__(path, environ) 150 | 151 | def get_content_length(self): 152 | return None 153 | 154 | def get_content_type(self): 155 | return None 156 | 157 | def get_creation_date(self): 158 | return None 159 | 160 | def get_display_name(self): 161 | return self.name 162 | 163 | def get_display_info(self): 164 | raise NotImplementedError 165 | 166 | def get_etag(self): 167 | return None 168 | 169 | def support_etag(self): 170 | return False 171 | 172 | def get_last_modified(self): 173 | return None 174 | 175 | def support_ranges(self): 176 | return False 177 | 178 | 179 | # def handle_delete(self): 180 | # raise DAVError(HTTP_FORBIDDEN) 181 | # def handle_move(self, destPath): 182 | # raise DAVError(HTTP_FORBIDDEN) 183 | # def handle_copy(self, destPath, depthInfinity): 184 | # raise DAVError(HTTP_FORBIDDEN) 185 | 186 | 187 | class TableArtifact(_VirtualNonCollection): 188 | """A virtual file, containing resource descriptions.""" 189 | 190 | def __init__(self, path, environ, db_collection): 191 | # assert name in _artifactNames 192 | super().__init__(path, environ) 193 | _, format = os.path.splitext(path) 194 | self.format = format 195 | self.db_collection = db_collection 196 | 197 | def get_content_length(self): 198 | return len(self.get_content().read()) 199 | 200 | def get_content_type(self): 201 | _, ext = os.path.splitext(self.name) 202 | if ext == ".json": 203 | return "application/json" 204 | else: 205 | return "text/plain" 206 | 207 | def get_display_info(self): 208 | return {"type": "Virtual info file"} 209 | 210 | def prevent_locking(self): 211 | return True 212 | 213 | def get_ref_url(self): 214 | return quote(self.provider.share_path + self.name) 215 | 216 | def get_content(self): 217 | name, format = os.path.splitext(self.name) 218 | return self.db_collection.db.table_contents(name, format=format) 219 | 220 | 221 | class DBResourceProvider(DAVProvider): 222 | """ 223 | DAV provider that serves a VirtualResource derived structure. 224 | """ 225 | 226 | def __init__( 227 | self, db_paths=[], formats=TABLE_FORMATTERS.keys(), allow_abspath=False 228 | ): 229 | self.formats = formats 230 | self.db_paths = db_paths 231 | self.allow_abspath = allow_abspath 232 | super().__init__() 233 | db_names = self.dbs 234 | assert (len(db_names)) == len(set(db_names)), "database names must be unique" 235 | 236 | def db(self, local_path): 237 | if "/" in local_path: 238 | return DB("/" + local_path) 239 | else: 240 | return {d.name: d for d in self.dbs}[local_path] 241 | 242 | @property 243 | def dbs(self): 244 | return [DB(p) for p in self.db_paths] 245 | 246 | def get_resource_inst(self, path, environ): 247 | # _logger.info("get_resource_inst('%s')" % path) 248 | self._count_get_resource_inst += 1 249 | root = RootCollection(environ, self, formats=self.formats) 250 | return root.resolve("", path) 251 | 252 | 253 | class DB: 254 | def __init__(self, path): 255 | self.path = path 256 | 257 | @property 258 | def name(self): 259 | return os.path.basename(self.path) 260 | 261 | @property 262 | def con(self): 263 | return sqlite3.connect(self.path) 264 | 265 | @property 266 | def table_names(self): 267 | return [ 268 | i[0] 269 | for i in self.con.cursor() 270 | .execute("SELECT name from sqlite_master where type ='table';") 271 | .fetchall() 272 | ] 273 | 274 | def table_contents(self, table_name, format=".csv"): 275 | assert table_name in self.table_names 276 | query = f"SELECT * from {table_name}" # Required to avoid SQL injection 277 | o = io.BytesIO() 278 | TABLE_FORMATTERS[format](query, self.con, o) 279 | o.seek(0) 280 | return o 281 | --------------------------------------------------------------------------------