├── .github └── dependabot.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .travis.yml ├── LICENSE ├── README.md ├── bin ├── build_hashes.py ├── run_backend.py ├── run_webapp.sh ├── shutdown.py ├── start.py ├── start_website.py ├── stop.py └── update.py ├── config ├── generic.json.sample └── logging.json.sample ├── logs └── .keepdir ├── lookup ├── lookup.conf ├── run_redis.sh └── shutdown_redis.sh ├── mypy.ini ├── poetry.lock ├── pyproject.toml ├── sanejs ├── __init__.py ├── default │ ├── __init__.py │ ├── abstractmanager.py │ ├── exceptions.py │ └── helpers.py ├── query.py └── sanejs.py ├── tools └── validate_config_files.py └── website ├── __init__.py ├── logs └── .keepdir └── web ├── __init__.py └── proxied.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | - package-ecosystem: "gitsubmodule" 13 | directory: "/client/" 14 | schedule: 15 | interval: "daily" 16 | - package-ecosystem: "gitsubmodule" 17 | directory: "/cdnjs/" 18 | schedule: 19 | interval: "daily" 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Local stuff 107 | *.swp 108 | *.json 109 | cdnjs_commit 110 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "cdnjs"] 2 | path = cdnjs 3 | url = https://github.com/cdnjs/cdnjs 4 | ignore = dirty 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.1.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - repo: https://github.com/asottile/pyupgrade 12 | rev: v2.31.1 13 | hooks: 14 | - id: pyupgrade 15 | args: [--py38-plus] 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.8" 5 | - "3.8-dev" 6 | 7 | install: 8 | - pip install poetry 9 | - poetry install 10 | 11 | script: 12 | - poetry run mypy . 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2018, CIRCL - Computer Incident Response Center Luxembourg 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sanejs 2 | 3 | Use CDNJS as a source to build hashes of known JS/CSS/IMG/... content used all over the internet 4 | 5 | # Big warning. Seriously. 6 | 7 | 1. Pulling the submodule downloads a ~50G git repository 8 | 2. At first checkout, it will use ~900G on your disk 9 | 10 | You've been warned. 11 | 12 | # Online service for sanejs 13 | 14 | If you don't want to install the complete server, CIRCL operates an online service (https://sanejs.circl.lu) to query sanejs. 15 | 16 | ## sanejs - online service 17 | 18 | ```bash 19 | curl https://sanejs.circl.lu/sha512 --request POST --data '{"sha512":"908a25a227d1d6dd4397ddbf8ed19d58d092edd11f7dfbe89385e1f340211aed0ef7777edae3d3c1824f410949b7b9373753b83a3178b0f656fb97424bb20bc2"}' 20 | ``` 21 | 22 | ```bash 23 | curl https://sanejs.circl.lu/library --request POST --data '{"library":"dojo"}' 24 | ``` 25 | 26 | # Installation 27 | 28 | **IMPORTANT**: Use [poetry](https://github.com/python-poetry/poetry) 29 | 30 | ## Install redis 31 | 32 | ```bash 33 | git clone https://github.com/valkey-io/valkey.git 34 | cd valkey 35 | git checkout 8.0 36 | make 37 | cd .. 38 | ``` 39 | 40 | ## Install & run SaneJS 41 | 42 | ```bash 43 | git clone https://github.com/Lookyloo/sanejs.git 44 | cd sanejs 45 | git submodule init 46 | git submodule update 47 | pushd cdnjs 48 | git checkout master 49 | popd 50 | poetry install 51 | echo SANEJS_HOME="'`pwd`'" > .env 52 | poetry shell 53 | # Starts all 54 | start.py 55 | ``` 56 | 57 | **Note**: As long as the hashes aren't loaded, every query will return the following: 58 | 59 | ```json 60 | { 61 | "error": "The hashes are not all loaded yet, try again later." 62 | } 63 | ``` 64 | 65 | When they're all loaded, the repository will be pulled on a regular basis and load the new hashes. 66 | 67 | # Curl Usage 68 | 69 | ```bash 70 | curl https://sanejs.circl.lu/sha512 --request POST --data '{"sha512":"908a25a227d1d6dd4397ddbf8ed19d58d092edd11f7dfbe89385e1f340211aed0ef7777edae3d3c1824f410949b7b9373753b83a3178b0f656fb97424bb20bc2"}' 71 | ``` 72 | 73 | ```bash 74 | curl https://sanejs.circl.lu/library --request POST --data '{"library":"dojo"}' 75 | ``` 76 | 77 | # CLI usage (from [PySaneJS](https://github.com/Lookyloo/PySaneJS/)) 78 | 79 | It is not super useful to use it like that, but you can give it a try: 80 | 81 | ```bash 82 | # You can pass a list of sha512 83 | sanejs --url http://sanejs.circl.lu --sha512 908a25a227d1d6dd4397ddbf8ed19d58d092edd11f7dfbe89385e1f340211aed0ef7777edae3d3c1824f410949b7b9373753b83a3178b0f656fb97424bb20bc2 84 | ``` 85 | 86 | ```json 87 | { 88 | "response": [ 89 | "dojo|1.11.0-rc3|resources/dnd.css", 90 | "dojo|1.9.3|resources/dnd.css", 91 | "dojo|1.8.10|resources/dnd.css", 92 | "dojo|1.10.0|resources/dnd.css", 93 | "dojo|1.9.1|resources/dnd.css", 94 | "dojo|1.10.2|resources/dnd.css", 95 | "dojo|1.9.7|resources/dnd.css", 96 | "dojo|1.8.9|resources/dnd.css", 97 | "dojo|1.10.1|resources/dnd.css", 98 | "dojo|1.11.0-rc4|resources/dnd.css", 99 | "dojo|1.8.2|resources/dnd.css", 100 | "dojo|1.10.4|resources/dnd.css", 101 | "dojo|1.8.8|resources/dnd.css", 102 | "dojo|1.9.6|resources/dnd.css", 103 | "dojo|1.8.0|resources/dnd.css", 104 | "dojo|1.11.0-rc5|resources/dnd.css", 105 | "dojo|1.8.6|resources/dnd.css", 106 | "dojo|1.9.5|resources/dnd.css", 107 | "dojo|1.8.1|resources/dnd.css", 108 | "dojo|1.10.3|resources/dnd.css", 109 | "dojo|1.8.5|resources/dnd.css", 110 | "dojo|1.8.3|resources/dnd.css", 111 | "dojo|1.9.4|resources/dnd.css", 112 | "dojo|1.9.0|resources/dnd.css", 113 | "dojo|1.9.2|resources/dnd.css", 114 | "dojo|1.11.0-rc1|resources/dnd.css", 115 | "dojo|1.8.4|resources/dnd.css", 116 | "dojo|1.8.7|resources/dnd.css", 117 | "dojo|1.11.0-rc2|resources/dnd.css" 118 | ] 119 | } 120 | ``` 121 | 122 | 123 | ```bash 124 | sanejs --url http://sanejs.circl.lu --library jquery-tools # You can pass a list of tools 125 | ``` 126 | 127 | ```json 128 | { 129 | "response": { 130 | "jquery-tools": { 131 | "1.2.0": { 132 | "jquery.tools.min.js": "f95c034c328d7c3f5bd14e0fd82a9309ab197931ff41120ca8d749036f5a773092dc0f357b190570754f5a17d7a42a71b932793a54b0ec812eef3730ddc93dc9" 133 | }, 134 | "1.2.1": { 135 | "jquery.tools.min.js": "ba386f0827c971277c3f6941c58f9dbc410f668b272201127ee38377f57a8ec37c2cb415089cb12205c6ed2c339bf6f5a7d20c6259ae1f55337154257a398204" 136 | }, 137 | "1.2.2": { 138 | "jquery.tools.min.js": "b40b56d553cb23c7fb607f31118ba7c2ae1058308795d5b0f6d42025c7aa3f9f2b5fbb3be4c8734cf6f8f2c3dd202aca79de14d7a54d448bbe34c8198b94fc96" 139 | }, 140 | "1.2.3": { 141 | "jquery.tools.min.js": "597bb3566588ba0ec2c7fce0f4449022be687878d5c04113526503a0e77b79755c33a9ba1ad6ef8232a4a51b98b7a8b287caba7db699b4374a53370fb51f859d" 142 | }, 143 | "1.2.4": { 144 | "jquery.tools.min.js": "1dbcb177bf7b28c72d3b54aa71befa5a6d91e35c1df702a1991c9df7e60aa3efcd59bbdb8fb0a61326c3ebfe046c809ea01030c3fd8de4b90668e2aee778d968" 145 | }, 146 | "1.2.5": { 147 | "jquery.tools.min.js": "d91fdfc6cb7529493182d3c7ea12eb6cb3323060434bfd4c98c95c9f223fa97cff9a9254c5655b51818491d9de9f53ba3df1b5cbd1a20ed0dce683829b75db6a" 148 | }, 149 | "1.2.6": { 150 | "jquery.tools.min.js": "f8be2202d8ff862849e19562ba93e2743027298d9fc908191ca48978458a7053c584c581f44f37b8a595ce9262fbda1b5bea83330dd3366fc2c44a172e286f96" 151 | }, 152 | "1.2.7": { 153 | "jquery.tools.min.js": "b15d794a0289980a2dcffe70eb5ecaf42e2a3785a3dd8324f577fae7e8f381098fa9f8f048f6f0c1029d584d618ff5a30c6112a9baa1e1809f2ffb4781373e11" 154 | } 155 | } 156 | } 157 | } 158 | ``` 159 | -------------------------------------------------------------------------------- /bin/build_hashes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from sanejs.sanejs import SaneJS 4 | 5 | from sanejs.default import AbstractManager, get_config 6 | import logging 7 | import logging.config 8 | 9 | logging.config.dictConfig(get_config('logging')) 10 | 11 | 12 | class SaneJSManager(AbstractManager): 13 | 14 | def __init__(self, loglevel: int=logging.INFO): 15 | super().__init__(loglevel) 16 | self.script_name = 'build_hashes' 17 | self.sanejs = SaneJS(loglevel) 18 | self.sanejs.compute_hashes(force_recache=True) 19 | 20 | def _to_run_forever(self): 21 | self.sanejs.compute_hashes() 22 | 23 | 24 | def main(): 25 | s = SaneJSManager() 26 | s.run(sleep_in_sec=3600) 27 | 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /bin/run_backend.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import time 6 | from pathlib import Path 7 | from subprocess import Popen 8 | from typing import Optional, Dict 9 | 10 | from redis import Redis 11 | from redis.exceptions import ConnectionError 12 | 13 | from sanejs.default import get_homedir, get_socket_path 14 | 15 | 16 | def check_running(name: str) -> bool: 17 | socket_path = get_socket_path(name) 18 | if not os.path.exists(socket_path): 19 | return False 20 | try: 21 | r = Redis(unix_socket_path=socket_path) 22 | return True if r.ping() else False 23 | except ConnectionError: 24 | return False 25 | 26 | 27 | def launch_lookup(storage_directory: Optional[Path]=None): 28 | if not storage_directory: 29 | storage_directory = get_homedir() 30 | if not check_running('lookup'): 31 | Popen(["./run_redis.sh"], cwd=(storage_directory / 'lookup')) 32 | 33 | 34 | def shutdown_lookup(storage_directory: Optional[Path]=None): 35 | if not storage_directory: 36 | storage_directory = get_homedir() 37 | r = Redis(unix_socket_path=get_socket_path('lookup')) 38 | r.shutdown(save=True) 39 | print('Redis lookup database shutdown.') 40 | 41 | 42 | def launch_all(): 43 | launch_lookup() 44 | 45 | 46 | def check_all(stop: bool=False): 47 | backends: Dict[str, bool] = {'lookup': False} 48 | while True: 49 | for db_name in backends.keys(): 50 | try: 51 | backends[db_name] = check_running(db_name) 52 | except Exception: 53 | backends[db_name] = False 54 | if stop: 55 | if not any(running for running in backends.values()): 56 | break 57 | else: 58 | if all(running for running in backends.values()): 59 | break 60 | for db_name, running in backends.items(): 61 | if not stop and not running: 62 | print(f"Waiting on {db_name} to start") 63 | if stop and running: 64 | print(f"Waiting on {db_name} to stop") 65 | time.sleep(1) 66 | 67 | 68 | def stop_all(): 69 | shutdown_lookup() 70 | 71 | 72 | def main(): 73 | parser = argparse.ArgumentParser(description='Manage backend DBs.') 74 | parser.add_argument("--start", action='store_true', default=False, help="Start all") 75 | parser.add_argument("--stop", action='store_true', default=False, help="Stop all") 76 | parser.add_argument("--status", action='store_true', default=True, help="Show status") 77 | args = parser.parse_args() 78 | 79 | if args.start: 80 | launch_all() 81 | if args.stop: 82 | stop_all() 83 | if not args.stop and args.status: 84 | check_all() 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /bin/run_webapp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -x 5 | 6 | FLASK_APP=flask_lookup.py flask run 7 | -------------------------------------------------------------------------------- /bin/shutdown.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | from sanejs.default import AbstractManager 6 | 7 | 8 | def main(): 9 | AbstractManager.force_shutdown() 10 | time.sleep(5) 11 | while True: 12 | try: 13 | running = AbstractManager.is_running() 14 | except FileNotFoundError: 15 | print('Redis is already down.') 16 | break 17 | if not running: 18 | break 19 | print(running) 20 | time.sleep(5) 21 | 22 | 23 | if __name__ == '__main__': 24 | main() 25 | -------------------------------------------------------------------------------- /bin/start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from subprocess import Popen, run 4 | 5 | from sanejs.default import get_homedir 6 | 7 | 8 | def main(): 9 | # Just fail if the env isn't set. 10 | get_homedir() 11 | print('Start backend (redis)...') 12 | p = run(['run_backend', '--start']) 13 | p.check_returncode() 14 | print('done.') 15 | Popen(['build_hashes']) 16 | print('Start website...') 17 | Popen(['start_website']) 18 | print('done.') 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /bin/start_website.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import logging.config 5 | from subprocess import Popen 6 | 7 | from sanejs.default import AbstractManager 8 | from sanejs.default import get_config, get_homedir 9 | 10 | logging.config.dictConfig(get_config('logging')) 11 | 12 | 13 | class Website(AbstractManager): 14 | 15 | def __init__(self, loglevel: int=logging.INFO): 16 | super().__init__(loglevel) 17 | self.script_name = 'website' 18 | self.process = self._launch_website() 19 | self.set_running() 20 | 21 | def _launch_website(self): 22 | website_dir = get_homedir() / 'website' 23 | ip = get_config('generic', 'website_listen_ip') 24 | port = get_config('generic', 'website_listen_port') 25 | return Popen(['gunicorn', '-w', '10', 26 | '--graceful-timeout', '2', '--timeout', '300', 27 | '-b', f'{ip}:{port}', 28 | '--log-level', 'info', 29 | 'web:app'], 30 | cwd=website_dir) 31 | 32 | 33 | def main(): 34 | w = Website() 35 | w.run(sleep_in_sec=10) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /bin/stop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from subprocess import Popen, run 4 | 5 | from redis import Redis 6 | from redis.exceptions import ConnectionError 7 | 8 | from sanejs.default import get_homedir, get_socket_path 9 | 10 | 11 | def main(): 12 | get_homedir() 13 | p = Popen(['shutdown']) 14 | p.wait() 15 | try: 16 | r = Redis(unix_socket_path=get_socket_path('lookup'), db=1) 17 | r.delete('shutdown') 18 | print('Shutting down databases...') 19 | p_backend = run(['run_backend', '--stop']) 20 | p_backend.check_returncode() 21 | print('done.') 22 | except ConnectionError: 23 | # Already down, skip the stacktrace 24 | pass 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /bin/update.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import hashlib 5 | import logging 6 | import logging.config 7 | import platform 8 | import shlex 9 | import subprocess 10 | import sys 11 | from pathlib import Path 12 | 13 | from sanejs.default import get_homedir, get_config 14 | 15 | logging.config.dictConfig(get_config('logging')) 16 | 17 | 18 | def compute_hash_self(): 19 | m = hashlib.sha256() 20 | with (get_homedir() / 'bin' / 'update.py').open('rb') as f: 21 | m.update(f.read()) 22 | return m.digest() 23 | 24 | 25 | def keep_going(ignore=False): 26 | if ignore: 27 | return 28 | keep_going = input('Continue? (y/N) ') 29 | if keep_going.lower() != 'y': 30 | print('Okay, quitting.') 31 | sys.exit() 32 | 33 | 34 | def run_command(command, expect_fail: bool=False, capture_output: bool=True): 35 | args = shlex.split(command) 36 | homedir = get_homedir() 37 | process = subprocess.run(args, cwd=homedir, capture_output=capture_output) 38 | if capture_output: 39 | print(process.stdout.decode()) 40 | if process.returncode and not expect_fail: 41 | print(process.stderr.decode()) 42 | sys.exit() 43 | 44 | 45 | def check_poetry_version(): 46 | args = shlex.split("poetry self -V") 47 | homedir = get_homedir() 48 | process = subprocess.run(args, cwd=homedir, capture_output=True) 49 | poetry_version_str = process.stdout.decode() 50 | version = poetry_version_str.split()[2] 51 | version = version.strip(')') 52 | version_details = tuple(int(i) for i in version.split('.')) 53 | if version_details < (1, 2, 0): 54 | print('The project requires poetry >= 1.2.0, please update.') 55 | print('If you installed with "pip install --user poetry", run "pip install --user -U poetry"') 56 | print('If you installed via the recommended method, use "poetry self update"') 57 | print('More details: https://github.com/python-poetry/poetry#updating-poetry') 58 | sys.exit() 59 | 60 | 61 | def main(): 62 | parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.') 63 | parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.') 64 | args = parser.parse_args() 65 | 66 | old_hash = compute_hash_self() 67 | 68 | print('* Update repository.') 69 | keep_going(args.yes) 70 | run_command('git submodule init') 71 | run_command('git pull --recurse-submodules') 72 | new_hash = compute_hash_self() 73 | if old_hash != new_hash: 74 | print('Update script changed, please do "poetry run update"') 75 | sys.exit() 76 | 77 | check_poetry_version() 78 | 79 | print('* Install/update dependencies.') 80 | keep_going(args.yes) 81 | run_command('poetry install') 82 | 83 | print('* Validate configuration files.') 84 | keep_going(args.yes) 85 | run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --check') 86 | 87 | print('* Update configuration files.') 88 | keep_going(args.yes) 89 | run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --update') 90 | 91 | print('* Update third party dependencies for the website.') 92 | keep_going(args.yes) 93 | run_command(f'poetry run {(Path("tools") / "3rdparty.py").as_posix()}') 94 | 95 | print('* Restarting') 96 | keep_going(args.yes) 97 | if platform.system() == 'Windows': 98 | print('Restarting with poetry...') 99 | run_command('poetry run stop', expect_fail=True) 100 | run_command('poetry run start', capture_output=False) 101 | print('Started.') 102 | else: 103 | service = get_config('generic', 'systemd_service_name') 104 | p = subprocess.run(["systemctl", "is-active", "--quiet", service]) 105 | try: 106 | p.check_returncode() 107 | print('Restarting with systemd...') 108 | run_command(f'sudo service {service} restart') 109 | print('done.') 110 | except subprocess.CalledProcessError: 111 | print('Restarting with poetry...') 112 | run_command('poetry run stop', expect_fail=True) 113 | run_command('poetry run start', capture_output=False) 114 | print('Started.') 115 | 116 | 117 | if __name__ == '__main__': 118 | main() 119 | -------------------------------------------------------------------------------- /config/generic.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "loglevel": "INFO", 3 | "website_listen_ip": "0.0.0.0", 4 | "website_listen_port": 5007, 5 | "systemd_service_name": "sanejs", 6 | "_notes": { 7 | "loglevel": "(sanejs) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", 8 | "website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.", 9 | "website_listen_port": "Port Flask will listen on.", 10 | "systemd_service_name": "(Optional) Name of the systemd service if your project has one." 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /config/logging.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": false, 4 | "formatters": { 5 | "simple": { 6 | "format": "%(asctime)s %(name)s %(levelname)s:%(message)s" 7 | } 8 | }, 9 | "handlers": { 10 | "stdout": { 11 | "class": "logging.StreamHandler", 12 | "level": "INFO", 13 | "formatter": "simple", 14 | "stream": "ext://sys.stdout" 15 | }, 16 | "file": { 17 | "class": "logging.handlers.RotatingFileHandler", 18 | "level": "WARNING", 19 | "formatter": "simple", 20 | "filename": "logs/warning.log", 21 | "mode": "a", 22 | "maxBytes": 1000000, 23 | "backupCount": 5 24 | } 25 | }, 26 | "root": { 27 | "level": "DEBUG", 28 | "handlers": [ 29 | "stdout", 30 | "file" 31 | ] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /logs/.keepdir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lookyloo/sanejs/13cf4763231f9bc899fbf12565f3225a3a77def0/logs/.keepdir -------------------------------------------------------------------------------- /lookup/lookup.conf: -------------------------------------------------------------------------------- 1 | # Redis configuration file example. 2 | # 3 | # Note that in order to read the configuration file, Redis must be 4 | # started with the file path as first argument: 5 | # 6 | # ./redis-server /path/to/redis.conf 7 | 8 | # Note on units: when memory size is needed, it is possible to specify 9 | # it in the usual form of 1k 5GB 4M and so forth: 10 | # 11 | # 1k => 1000 bytes 12 | # 1kb => 1024 bytes 13 | # 1m => 1000000 bytes 14 | # 1mb => 1024*1024 bytes 15 | # 1g => 1000000000 bytes 16 | # 1gb => 1024*1024*1024 bytes 17 | # 18 | # units are case insensitive so 1GB 1Gb 1gB are all the same. 19 | 20 | ################################## INCLUDES ################################### 21 | 22 | # Include one or more other config files here. This is useful if you 23 | # have a standard template that goes to all Redis servers but also need 24 | # to customize a few per-server settings. Include files can include 25 | # other files, so use this wisely. 26 | # 27 | # Notice option "include" won't be rewritten by command "CONFIG REWRITE" 28 | # from admin or Redis Sentinel. Since Redis always uses the last processed 29 | # line as value of a configuration directive, you'd better put includes 30 | # at the beginning of this file to avoid overwriting config change at runtime. 31 | # 32 | # If instead you are interested in using includes to override configuration 33 | # options, it is better to use include as the last line. 34 | # 35 | # include /path/to/local.conf 36 | # include /path/to/other.conf 37 | 38 | ################################## MODULES ##################################### 39 | 40 | # Load modules at startup. If the server is not able to load modules 41 | # it will abort. It is possible to use multiple loadmodule directives. 42 | # 43 | # loadmodule /path/to/my_module.so 44 | # loadmodule /path/to/other_module.so 45 | 46 | ################################## NETWORK ##################################### 47 | 48 | # By default, if no "bind" configuration directive is specified, Redis listens 49 | # for connections from all the network interfaces available on the server. 50 | # It is possible to listen to just one or multiple selected interfaces using 51 | # the "bind" configuration directive, followed by one or more IP addresses. 52 | # 53 | # Examples: 54 | # 55 | # bind 192.168.1.100 10.0.0.1 56 | # bind 127.0.0.1 ::1 57 | # 58 | # ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the 59 | # internet, binding to all the interfaces is dangerous and will expose the 60 | # instance to everybody on the internet. So by default we uncomment the 61 | # following bind directive, that will force Redis to listen only into 62 | # the IPv4 lookback interface address (this means Redis will be able to 63 | # accept connections only from clients running into the same computer it 64 | # is running). 65 | # 66 | # IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES 67 | # JUST COMMENT THE FOLLOWING LINE. 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | bind 127.0.0.1 70 | 71 | # Protected mode is a layer of security protection, in order to avoid that 72 | # Redis instances left open on the internet are accessed and exploited. 73 | # 74 | # When protected mode is on and if: 75 | # 76 | # 1) The server is not binding explicitly to a set of addresses using the 77 | # "bind" directive. 78 | # 2) No password is configured. 79 | # 80 | # The server only accepts connections from clients connecting from the 81 | # IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain 82 | # sockets. 83 | # 84 | # By default protected mode is enabled. You should disable it only if 85 | # you are sure you want clients from other hosts to connect to Redis 86 | # even if no authentication is configured, nor a specific set of interfaces 87 | # are explicitly listed using the "bind" directive. 88 | protected-mode yes 89 | 90 | # Accept connections on the specified port, default is 6379 (IANA #815344). 91 | # If port 0 is specified Redis will not listen on a TCP socket. 92 | port 0 93 | 94 | # TCP listen() backlog. 95 | # 96 | # In high requests-per-second environments you need an high backlog in order 97 | # to avoid slow clients connections issues. Note that the Linux kernel 98 | # will silently truncate it to the value of /proc/sys/net/core/somaxconn so 99 | # make sure to raise both the value of somaxconn and tcp_max_syn_backlog 100 | # in order to get the desired effect. 101 | tcp-backlog 511 102 | 103 | # Unix socket. 104 | # 105 | # Specify the path for the Unix socket that will be used to listen for 106 | # incoming connections. There is no default, so Redis will not listen 107 | # on a unix socket when not specified. 108 | # 109 | unixsocket lookup.sock 110 | unixsocketperm 700 111 | 112 | # Close the connection after a client is idle for N seconds (0 to disable) 113 | timeout 0 114 | 115 | # TCP keepalive. 116 | # 117 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence 118 | # of communication. This is useful for two reasons: 119 | # 120 | # 1) Detect dead peers. 121 | # 2) Take the connection alive from the point of view of network 122 | # equipment in the middle. 123 | # 124 | # On Linux, the specified value (in seconds) is the period used to send ACKs. 125 | # Note that to close the connection the double of the time is needed. 126 | # On other kernels the period depends on the kernel configuration. 127 | # 128 | # A reasonable value for this option is 300 seconds, which is the new 129 | # Redis default starting with Redis 3.2.1. 130 | tcp-keepalive 300 131 | 132 | ################################# GENERAL ##################################### 133 | 134 | # By default Redis does not run as a daemon. Use 'yes' if you need it. 135 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized. 136 | daemonize yes 137 | 138 | # If you run Redis from upstart or systemd, Redis can interact with your 139 | # supervision tree. Options: 140 | # supervised no - no supervision interaction 141 | # supervised upstart - signal upstart by putting Redis into SIGSTOP mode 142 | # supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET 143 | # supervised auto - detect upstart or systemd method based on 144 | # UPSTART_JOB or NOTIFY_SOCKET environment variables 145 | # Note: these supervision methods only signal "process is ready." 146 | # They do not enable continuous liveness pings back to your supervisor. 147 | supervised no 148 | 149 | # If a pid file is specified, Redis writes it where specified at startup 150 | # and removes it at exit. 151 | # 152 | # When the server runs non daemonized, no pid file is created if none is 153 | # specified in the configuration. When the server is daemonized, the pid file 154 | # is used even if not specified, defaulting to "/var/run/redis.pid". 155 | # 156 | # Creating a pid file is best effort: if Redis is not able to create it 157 | # nothing bad happens, the server will start and run normally. 158 | #pidfile /var/run/redis_6379.pid 159 | 160 | # Specify the server verbosity level. 161 | # This can be one of: 162 | # debug (a lot of information, useful for development/testing) 163 | # verbose (many rarely useful info, but not a mess like the debug level) 164 | # notice (moderately verbose, what you want in production probably) 165 | # warning (only very important / critical messages are logged) 166 | loglevel notice 167 | 168 | # Specify the log file name. Also the empty string can be used to force 169 | # Redis to log on the standard output. Note that if you use standard 170 | # output for logging but daemonize, logs will be sent to /dev/null 171 | logfile "lookup.log" 172 | 173 | # To enable logging to the system logger, just set 'syslog-enabled' to yes, 174 | # and optionally update the other syslog parameters to suit your needs. 175 | # syslog-enabled no 176 | 177 | # Specify the syslog identity. 178 | # syslog-ident redis 179 | 180 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. 181 | # syslog-facility local0 182 | 183 | # Set the number of databases. The default database is DB 0, you can select 184 | # a different one on a per-connection basis using SELECT where 185 | # dbid is a number between 0 and 'databases'-1 186 | databases 16 187 | 188 | # By default Redis shows an ASCII art logo only when started to log to the 189 | # standard output and if the standard output is a TTY. Basically this means 190 | # that normally a logo is displayed only in interactive sessions. 191 | # 192 | # However it is possible to force the pre-4.0 behavior and always show a 193 | # ASCII art logo in startup logs by setting the following option to yes. 194 | always-show-logo yes 195 | 196 | ################################ SNAPSHOTTING ################################ 197 | # 198 | # Save the DB on disk: 199 | # 200 | # save 201 | # 202 | # Will save the DB if both the given number of seconds and the given 203 | # number of write operations against the DB occurred. 204 | # 205 | # In the example below the behaviour will be to save: 206 | # after 900 sec (15 min) if at least 1 key changed 207 | # after 300 sec (5 min) if at least 10 keys changed 208 | # after 60 sec if at least 10000 keys changed 209 | # 210 | # Note: you can disable saving completely by commenting out all "save" lines. 211 | # 212 | # It is also possible to remove all the previously configured save 213 | # points by adding a save directive with a single empty string argument 214 | # like in the following example: 215 | # 216 | # save "" 217 | 218 | #save 900 1 219 | #save 300 10 220 | #save 60 10000 221 | save "" 222 | 223 | # By default Redis will stop accepting writes if RDB snapshots are enabled 224 | # (at least one save point) and the latest background save failed. 225 | # This will make the user aware (in a hard way) that data is not persisting 226 | # on disk properly, otherwise chances are that no one will notice and some 227 | # disaster will happen. 228 | # 229 | # If the background saving process will start working again Redis will 230 | # automatically allow writes again. 231 | # 232 | # However if you have setup your proper monitoring of the Redis server 233 | # and persistence, you may want to disable this feature so that Redis will 234 | # continue to work as usual even if there are problems with disk, 235 | # permissions, and so forth. 236 | stop-writes-on-bgsave-error yes 237 | 238 | # Compress string objects using LZF when dump .rdb databases? 239 | # For default that's set to 'yes' as it's almost always a win. 240 | # If you want to save some CPU in the saving child set it to 'no' but 241 | # the dataset will likely be bigger if you have compressible values or keys. 242 | rdbcompression yes 243 | 244 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file. 245 | # This makes the format more resistant to corruption but there is a performance 246 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it 247 | # for maximum performances. 248 | # 249 | # RDB files created with checksum disabled have a checksum of zero that will 250 | # tell the loading code to skip the check. 251 | rdbchecksum yes 252 | 253 | # The filename where to dump the DB 254 | dbfilename dump.rdb 255 | 256 | # The working directory. 257 | # 258 | # The DB will be written inside this directory, with the filename specified 259 | # above using the 'dbfilename' configuration directive. 260 | # 261 | # The Append Only File will also be created inside this directory. 262 | # 263 | # Note that you must specify a directory here, not a file name. 264 | dir ./ 265 | 266 | ################################# REPLICATION ################################# 267 | 268 | # Master-Slave replication. Use slaveof to make a Redis instance a copy of 269 | # another Redis server. A few things to understand ASAP about Redis replication. 270 | # 271 | # 1) Redis replication is asynchronous, but you can configure a master to 272 | # stop accepting writes if it appears to be not connected with at least 273 | # a given number of slaves. 274 | # 2) Redis slaves are able to perform a partial resynchronization with the 275 | # master if the replication link is lost for a relatively small amount of 276 | # time. You may want to configure the replication backlog size (see the next 277 | # sections of this file) with a sensible value depending on your needs. 278 | # 3) Replication is automatic and does not need user intervention. After a 279 | # network partition slaves automatically try to reconnect to masters 280 | # and resynchronize with them. 281 | # 282 | # slaveof 283 | 284 | # If the master is password protected (using the "requirepass" configuration 285 | # directive below) it is possible to tell the slave to authenticate before 286 | # starting the replication synchronization process, otherwise the master will 287 | # refuse the slave request. 288 | # 289 | # masterauth 290 | 291 | # When a slave loses its connection with the master, or when the replication 292 | # is still in progress, the slave can act in two different ways: 293 | # 294 | # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will 295 | # still reply to client requests, possibly with out of date data, or the 296 | # data set may just be empty if this is the first synchronization. 297 | # 298 | # 2) if slave-serve-stale-data is set to 'no' the slave will reply with 299 | # an error "SYNC with master in progress" to all the kind of commands 300 | # but to INFO and SLAVEOF. 301 | # 302 | slave-serve-stale-data yes 303 | 304 | # You can configure a slave instance to accept writes or not. Writing against 305 | # a slave instance may be useful to store some ephemeral data (because data 306 | # written on a slave will be easily deleted after resync with the master) but 307 | # may also cause problems if clients are writing to it because of a 308 | # misconfiguration. 309 | # 310 | # Since Redis 2.6 by default slaves are read-only. 311 | # 312 | # Note: read only slaves are not designed to be exposed to untrusted clients 313 | # on the internet. It's just a protection layer against misuse of the instance. 314 | # Still a read only slave exports by default all the administrative commands 315 | # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve 316 | # security of read only slaves using 'rename-command' to shadow all the 317 | # administrative / dangerous commands. 318 | slave-read-only yes 319 | 320 | # Replication SYNC strategy: disk or socket. 321 | # 322 | # ------------------------------------------------------- 323 | # WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY 324 | # ------------------------------------------------------- 325 | # 326 | # New slaves and reconnecting slaves that are not able to continue the replication 327 | # process just receiving differences, need to do what is called a "full 328 | # synchronization". An RDB file is transmitted from the master to the slaves. 329 | # The transmission can happen in two different ways: 330 | # 331 | # 1) Disk-backed: The Redis master creates a new process that writes the RDB 332 | # file on disk. Later the file is transferred by the parent 333 | # process to the slaves incrementally. 334 | # 2) Diskless: The Redis master creates a new process that directly writes the 335 | # RDB file to slave sockets, without touching the disk at all. 336 | # 337 | # With disk-backed replication, while the RDB file is generated, more slaves 338 | # can be queued and served with the RDB file as soon as the current child producing 339 | # the RDB file finishes its work. With diskless replication instead once 340 | # the transfer starts, new slaves arriving will be queued and a new transfer 341 | # will start when the current one terminates. 342 | # 343 | # When diskless replication is used, the master waits a configurable amount of 344 | # time (in seconds) before starting the transfer in the hope that multiple slaves 345 | # will arrive and the transfer can be parallelized. 346 | # 347 | # With slow disks and fast (large bandwidth) networks, diskless replication 348 | # works better. 349 | repl-diskless-sync no 350 | 351 | # When diskless replication is enabled, it is possible to configure the delay 352 | # the server waits in order to spawn the child that transfers the RDB via socket 353 | # to the slaves. 354 | # 355 | # This is important since once the transfer starts, it is not possible to serve 356 | # new slaves arriving, that will be queued for the next RDB transfer, so the server 357 | # waits a delay in order to let more slaves arrive. 358 | # 359 | # The delay is specified in seconds, and by default is 5 seconds. To disable 360 | # it entirely just set it to 0 seconds and the transfer will start ASAP. 361 | repl-diskless-sync-delay 5 362 | 363 | # Slaves send PINGs to server in a predefined interval. It's possible to change 364 | # this interval with the repl_ping_slave_period option. The default value is 10 365 | # seconds. 366 | # 367 | # repl-ping-slave-period 10 368 | 369 | # The following option sets the replication timeout for: 370 | # 371 | # 1) Bulk transfer I/O during SYNC, from the point of view of slave. 372 | # 2) Master timeout from the point of view of slaves (data, pings). 373 | # 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). 374 | # 375 | # It is important to make sure that this value is greater than the value 376 | # specified for repl-ping-slave-period otherwise a timeout will be detected 377 | # every time there is low traffic between the master and the slave. 378 | # 379 | # repl-timeout 60 380 | 381 | # Disable TCP_NODELAY on the slave socket after SYNC? 382 | # 383 | # If you select "yes" Redis will use a smaller number of TCP packets and 384 | # less bandwidth to send data to slaves. But this can add a delay for 385 | # the data to appear on the slave side, up to 40 milliseconds with 386 | # Linux kernels using a default configuration. 387 | # 388 | # If you select "no" the delay for data to appear on the slave side will 389 | # be reduced but more bandwidth will be used for replication. 390 | # 391 | # By default we optimize for low latency, but in very high traffic conditions 392 | # or when the master and slaves are many hops away, turning this to "yes" may 393 | # be a good idea. 394 | repl-disable-tcp-nodelay no 395 | 396 | # Set the replication backlog size. The backlog is a buffer that accumulates 397 | # slave data when slaves are disconnected for some time, so that when a slave 398 | # wants to reconnect again, often a full resync is not needed, but a partial 399 | # resync is enough, just passing the portion of data the slave missed while 400 | # disconnected. 401 | # 402 | # The bigger the replication backlog, the longer the time the slave can be 403 | # disconnected and later be able to perform a partial resynchronization. 404 | # 405 | # The backlog is only allocated once there is at least a slave connected. 406 | # 407 | # repl-backlog-size 1mb 408 | 409 | # After a master has no longer connected slaves for some time, the backlog 410 | # will be freed. The following option configures the amount of seconds that 411 | # need to elapse, starting from the time the last slave disconnected, for 412 | # the backlog buffer to be freed. 413 | # 414 | # Note that slaves never free the backlog for timeout, since they may be 415 | # promoted to masters later, and should be able to correctly "partially 416 | # resynchronize" with the slaves: hence they should always accumulate backlog. 417 | # 418 | # A value of 0 means to never release the backlog. 419 | # 420 | # repl-backlog-ttl 3600 421 | 422 | # The slave priority is an integer number published by Redis in the INFO output. 423 | # It is used by Redis Sentinel in order to select a slave to promote into a 424 | # master if the master is no longer working correctly. 425 | # 426 | # A slave with a low priority number is considered better for promotion, so 427 | # for instance if there are three slaves with priority 10, 100, 25 Sentinel will 428 | # pick the one with priority 10, that is the lowest. 429 | # 430 | # However a special priority of 0 marks the slave as not able to perform the 431 | # role of master, so a slave with priority of 0 will never be selected by 432 | # Redis Sentinel for promotion. 433 | # 434 | # By default the priority is 100. 435 | slave-priority 100 436 | 437 | # It is possible for a master to stop accepting writes if there are less than 438 | # N slaves connected, having a lag less or equal than M seconds. 439 | # 440 | # The N slaves need to be in "online" state. 441 | # 442 | # The lag in seconds, that must be <= the specified value, is calculated from 443 | # the last ping received from the slave, that is usually sent every second. 444 | # 445 | # This option does not GUARANTEE that N replicas will accept the write, but 446 | # will limit the window of exposure for lost writes in case not enough slaves 447 | # are available, to the specified number of seconds. 448 | # 449 | # For example to require at least 3 slaves with a lag <= 10 seconds use: 450 | # 451 | # min-slaves-to-write 3 452 | # min-slaves-max-lag 10 453 | # 454 | # Setting one or the other to 0 disables the feature. 455 | # 456 | # By default min-slaves-to-write is set to 0 (feature disabled) and 457 | # min-slaves-max-lag is set to 10. 458 | 459 | # A Redis master is able to list the address and port of the attached 460 | # slaves in different ways. For example the "INFO replication" section 461 | # offers this information, which is used, among other tools, by 462 | # Redis Sentinel in order to discover slave instances. 463 | # Another place where this info is available is in the output of the 464 | # "ROLE" command of a master. 465 | # 466 | # The listed IP and address normally reported by a slave is obtained 467 | # in the following way: 468 | # 469 | # IP: The address is auto detected by checking the peer address 470 | # of the socket used by the slave to connect with the master. 471 | # 472 | # Port: The port is communicated by the slave during the replication 473 | # handshake, and is normally the port that the slave is using to 474 | # list for connections. 475 | # 476 | # However when port forwarding or Network Address Translation (NAT) is 477 | # used, the slave may be actually reachable via different IP and port 478 | # pairs. The following two options can be used by a slave in order to 479 | # report to its master a specific set of IP and port, so that both INFO 480 | # and ROLE will report those values. 481 | # 482 | # There is no need to use both the options if you need to override just 483 | # the port or the IP address. 484 | # 485 | # slave-announce-ip 5.5.5.5 486 | # slave-announce-port 1234 487 | 488 | ################################## SECURITY ################################### 489 | 490 | # Require clients to issue AUTH before processing any other 491 | # commands. This might be useful in environments in which you do not trust 492 | # others with access to the host running redis-server. 493 | # 494 | # This should stay commented out for backward compatibility and because most 495 | # people do not need auth (e.g. they run their own servers). 496 | # 497 | # Warning: since Redis is pretty fast an outside user can try up to 498 | # 150k passwords per second against a good box. This means that you should 499 | # use a very strong password otherwise it will be very easy to break. 500 | # 501 | # requirepass foobared 502 | 503 | # Command renaming. 504 | # 505 | # It is possible to change the name of dangerous commands in a shared 506 | # environment. For instance the CONFIG command may be renamed into something 507 | # hard to guess so that it will still be available for internal-use tools 508 | # but not available for general clients. 509 | # 510 | # Example: 511 | # 512 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 513 | # 514 | # It is also possible to completely kill a command by renaming it into 515 | # an empty string: 516 | # 517 | # rename-command CONFIG "" 518 | # 519 | # Please note that changing the name of commands that are logged into the 520 | # AOF file or transmitted to slaves may cause problems. 521 | 522 | ################################### CLIENTS #################################### 523 | 524 | # Set the max number of connected clients at the same time. By default 525 | # this limit is set to 10000 clients, however if the Redis server is not 526 | # able to configure the process file limit to allow for the specified limit 527 | # the max number of allowed clients is set to the current file limit 528 | # minus 32 (as Redis reserves a few file descriptors for internal uses). 529 | # 530 | # Once the limit is reached Redis will close all the new connections sending 531 | # an error 'max number of clients reached'. 532 | # 533 | # maxclients 10000 534 | 535 | ############################## MEMORY MANAGEMENT ################################ 536 | 537 | # Set a memory usage limit to the specified amount of bytes. 538 | # When the memory limit is reached Redis will try to remove keys 539 | # according to the eviction policy selected (see maxmemory-policy). 540 | # 541 | # If Redis can't remove keys according to the policy, or if the policy is 542 | # set to 'noeviction', Redis will start to reply with errors to commands 543 | # that would use more memory, like SET, LPUSH, and so on, and will continue 544 | # to reply to read-only commands like GET. 545 | # 546 | # This option is usually useful when using Redis as an LRU or LFU cache, or to 547 | # set a hard memory limit for an instance (using the 'noeviction' policy). 548 | # 549 | # WARNING: If you have slaves attached to an instance with maxmemory on, 550 | # the size of the output buffers needed to feed the slaves are subtracted 551 | # from the used memory count, so that network problems / resyncs will 552 | # not trigger a loop where keys are evicted, and in turn the output 553 | # buffer of slaves is full with DELs of keys evicted triggering the deletion 554 | # of more keys, and so forth until the database is completely emptied. 555 | # 556 | # In short... if you have slaves attached it is suggested that you set a lower 557 | # limit for maxmemory so that there is some free RAM on the system for slave 558 | # output buffers (but this is not needed if the policy is 'noeviction'). 559 | # 560 | # maxmemory 561 | 562 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory 563 | # is reached. You can select among five behaviors: 564 | # 565 | # volatile-lru -> Evict using approximated LRU among the keys with an expire set. 566 | # allkeys-lru -> Evict any key using approximated LRU. 567 | # volatile-lfu -> Evict using approximated LFU among the keys with an expire set. 568 | # allkeys-lfu -> Evict any key using approximated LFU. 569 | # volatile-random -> Remove a random key among the ones with an expire set. 570 | # allkeys-random -> Remove a random key, any key. 571 | # volatile-ttl -> Remove the key with the nearest expire time (minor TTL) 572 | # noeviction -> Don't evict anything, just return an error on write operations. 573 | # 574 | # LRU means Least Recently Used 575 | # LFU means Least Frequently Used 576 | # 577 | # Both LRU, LFU and volatile-ttl are implemented using approximated 578 | # randomized algorithms. 579 | # 580 | # Note: with any of the above policies, Redis will return an error on write 581 | # operations, when there are no suitable keys for eviction. 582 | # 583 | # At the date of writing these commands are: set setnx setex append 584 | # incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd 585 | # sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby 586 | # zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby 587 | # getset mset msetnx exec sort 588 | # 589 | # The default is: 590 | # 591 | # maxmemory-policy noeviction 592 | 593 | # LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated 594 | # algorithms (in order to save memory), so you can tune it for speed or 595 | # accuracy. For default Redis will check five keys and pick the one that was 596 | # used less recently, you can change the sample size using the following 597 | # configuration directive. 598 | # 599 | # The default of 5 produces good enough results. 10 Approximates very closely 600 | # true LRU but costs more CPU. 3 is faster but not very accurate. 601 | # 602 | # maxmemory-samples 5 603 | 604 | ############################# LAZY FREEING #################################### 605 | 606 | # Redis has two primitives to delete keys. One is called DEL and is a blocking 607 | # deletion of the object. It means that the server stops processing new commands 608 | # in order to reclaim all the memory associated with an object in a synchronous 609 | # way. If the key deleted is associated with a small object, the time needed 610 | # in order to execute the DEL command is very small and comparable to most other 611 | # O(1) or O(log_N) commands in Redis. However if the key is associated with an 612 | # aggregated value containing millions of elements, the server can block for 613 | # a long time (even seconds) in order to complete the operation. 614 | # 615 | # For the above reasons Redis also offers non blocking deletion primitives 616 | # such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and 617 | # FLUSHDB commands, in order to reclaim memory in background. Those commands 618 | # are executed in constant time. Another thread will incrementally free the 619 | # object in the background as fast as possible. 620 | # 621 | # DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. 622 | # It's up to the design of the application to understand when it is a good 623 | # idea to use one or the other. However the Redis server sometimes has to 624 | # delete keys or flush the whole database as a side effect of other operations. 625 | # Specifically Redis deletes objects independently of a user call in the 626 | # following scenarios: 627 | # 628 | # 1) On eviction, because of the maxmemory and maxmemory policy configurations, 629 | # in order to make room for new data, without going over the specified 630 | # memory limit. 631 | # 2) Because of expire: when a key with an associated time to live (see the 632 | # EXPIRE command) must be deleted from memory. 633 | # 3) Because of a side effect of a command that stores data on a key that may 634 | # already exist. For example the RENAME command may delete the old key 635 | # content when it is replaced with another one. Similarly SUNIONSTORE 636 | # or SORT with STORE option may delete existing keys. The SET command 637 | # itself removes any old content of the specified key in order to replace 638 | # it with the specified string. 639 | # 4) During replication, when a slave performs a full resynchronization with 640 | # its master, the content of the whole database is removed in order to 641 | # load the RDB file just transfered. 642 | # 643 | # In all the above cases the default is to delete objects in a blocking way, 644 | # like if DEL was called. However you can configure each case specifically 645 | # in order to instead release memory in a non-blocking way like if UNLINK 646 | # was called, using the following configuration directives: 647 | 648 | lazyfree-lazy-eviction no 649 | lazyfree-lazy-expire no 650 | lazyfree-lazy-server-del no 651 | slave-lazy-flush no 652 | 653 | ############################## APPEND ONLY MODE ############################### 654 | 655 | # By default Redis asynchronously dumps the dataset on disk. This mode is 656 | # good enough in many applications, but an issue with the Redis process or 657 | # a power outage may result into a few minutes of writes lost (depending on 658 | # the configured save points). 659 | # 660 | # The Append Only File is an alternative persistence mode that provides 661 | # much better durability. For instance using the default data fsync policy 662 | # (see later in the config file) Redis can lose just one second of writes in a 663 | # dramatic event like a server power outage, or a single write if something 664 | # wrong with the Redis process itself happens, but the operating system is 665 | # still running correctly. 666 | # 667 | # AOF and RDB persistence can be enabled at the same time without problems. 668 | # If the AOF is enabled on startup Redis will load the AOF, that is the file 669 | # with the better durability guarantees. 670 | # 671 | # Please check http://redis.io/topics/persistence for more information. 672 | 673 | appendonly no 674 | 675 | # The name of the append only file (default: "appendonly.aof") 676 | 677 | appendfilename "appendonly.aof" 678 | 679 | # The fsync() call tells the Operating System to actually write data on disk 680 | # instead of waiting for more data in the output buffer. Some OS will really flush 681 | # data on disk, some other OS will just try to do it ASAP. 682 | # 683 | # Redis supports three different modes: 684 | # 685 | # no: don't fsync, just let the OS flush the data when it wants. Faster. 686 | # always: fsync after every write to the append only log. Slow, Safest. 687 | # everysec: fsync only one time every second. Compromise. 688 | # 689 | # The default is "everysec", as that's usually the right compromise between 690 | # speed and data safety. It's up to you to understand if you can relax this to 691 | # "no" that will let the operating system flush the output buffer when 692 | # it wants, for better performances (but if you can live with the idea of 693 | # some data loss consider the default persistence mode that's snapshotting), 694 | # or on the contrary, use "always" that's very slow but a bit safer than 695 | # everysec. 696 | # 697 | # More details please check the following article: 698 | # http://antirez.com/post/redis-persistence-demystified.html 699 | # 700 | # If unsure, use "everysec". 701 | 702 | # appendfsync always 703 | appendfsync everysec 704 | # appendfsync no 705 | 706 | # When the AOF fsync policy is set to always or everysec, and a background 707 | # saving process (a background save or AOF log background rewriting) is 708 | # performing a lot of I/O against the disk, in some Linux configurations 709 | # Redis may block too long on the fsync() call. Note that there is no fix for 710 | # this currently, as even performing fsync in a different thread will block 711 | # our synchronous write(2) call. 712 | # 713 | # In order to mitigate this problem it's possible to use the following option 714 | # that will prevent fsync() from being called in the main process while a 715 | # BGSAVE or BGREWRITEAOF is in progress. 716 | # 717 | # This means that while another child is saving, the durability of Redis is 718 | # the same as "appendfsync none". In practical terms, this means that it is 719 | # possible to lose up to 30 seconds of log in the worst scenario (with the 720 | # default Linux settings). 721 | # 722 | # If you have latency problems turn this to "yes". Otherwise leave it as 723 | # "no" that is the safest pick from the point of view of durability. 724 | 725 | no-appendfsync-on-rewrite no 726 | 727 | # Automatic rewrite of the append only file. 728 | # Redis is able to automatically rewrite the log file implicitly calling 729 | # BGREWRITEAOF when the AOF log size grows by the specified percentage. 730 | # 731 | # This is how it works: Redis remembers the size of the AOF file after the 732 | # latest rewrite (if no rewrite has happened since the restart, the size of 733 | # the AOF at startup is used). 734 | # 735 | # This base size is compared to the current size. If the current size is 736 | # bigger than the specified percentage, the rewrite is triggered. Also 737 | # you need to specify a minimal size for the AOF file to be rewritten, this 738 | # is useful to avoid rewriting the AOF file even if the percentage increase 739 | # is reached but it is still pretty small. 740 | # 741 | # Specify a percentage of zero in order to disable the automatic AOF 742 | # rewrite feature. 743 | 744 | auto-aof-rewrite-percentage 100 745 | auto-aof-rewrite-min-size 64mb 746 | 747 | # An AOF file may be found to be truncated at the end during the Redis 748 | # startup process, when the AOF data gets loaded back into memory. 749 | # This may happen when the system where Redis is running 750 | # crashes, especially when an ext4 filesystem is mounted without the 751 | # data=ordered option (however this can't happen when Redis itself 752 | # crashes or aborts but the operating system still works correctly). 753 | # 754 | # Redis can either exit with an error when this happens, or load as much 755 | # data as possible (the default now) and start if the AOF file is found 756 | # to be truncated at the end. The following option controls this behavior. 757 | # 758 | # If aof-load-truncated is set to yes, a truncated AOF file is loaded and 759 | # the Redis server starts emitting a log to inform the user of the event. 760 | # Otherwise if the option is set to no, the server aborts with an error 761 | # and refuses to start. When the option is set to no, the user requires 762 | # to fix the AOF file using the "redis-check-aof" utility before to restart 763 | # the server. 764 | # 765 | # Note that if the AOF file will be found to be corrupted in the middle 766 | # the server will still exit with an error. This option only applies when 767 | # Redis will try to read more data from the AOF file but not enough bytes 768 | # will be found. 769 | aof-load-truncated yes 770 | 771 | # When rewriting the AOF file, Redis is able to use an RDB preamble in the 772 | # AOF file for faster rewrites and recoveries. When this option is turned 773 | # on the rewritten AOF file is composed of two different stanzas: 774 | # 775 | # [RDB file][AOF tail] 776 | # 777 | # When loading Redis recognizes that the AOF file starts with the "REDIS" 778 | # string and loads the prefixed RDB file, and continues loading the AOF 779 | # tail. 780 | # 781 | # This is currently turned off by default in order to avoid the surprise 782 | # of a format change, but will at some point be used as the default. 783 | aof-use-rdb-preamble no 784 | 785 | ################################ LUA SCRIPTING ############################### 786 | 787 | # Max execution time of a Lua script in milliseconds. 788 | # 789 | # If the maximum execution time is reached Redis will log that a script is 790 | # still in execution after the maximum allowed time and will start to 791 | # reply to queries with an error. 792 | # 793 | # When a long running script exceeds the maximum execution time only the 794 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be 795 | # used to stop a script that did not yet called write commands. The second 796 | # is the only way to shut down the server in the case a write command was 797 | # already issued by the script but the user doesn't want to wait for the natural 798 | # termination of the script. 799 | # 800 | # Set it to 0 or a negative value for unlimited execution without warnings. 801 | lua-time-limit 5000 802 | 803 | ################################ REDIS CLUSTER ############################### 804 | # 805 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 806 | # WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however 807 | # in order to mark it as "mature" we need to wait for a non trivial percentage 808 | # of users to deploy it in production. 809 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 810 | # 811 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are 812 | # started as cluster nodes can. In order to start a Redis instance as a 813 | # cluster node enable the cluster support uncommenting the following: 814 | # 815 | # cluster-enabled yes 816 | 817 | # Every cluster node has a cluster configuration file. This file is not 818 | # intended to be edited by hand. It is created and updated by Redis nodes. 819 | # Every Redis Cluster node requires a different cluster configuration file. 820 | # Make sure that instances running in the same system do not have 821 | # overlapping cluster configuration file names. 822 | # 823 | # cluster-config-file nodes-6379.conf 824 | 825 | # Cluster node timeout is the amount of milliseconds a node must be unreachable 826 | # for it to be considered in failure state. 827 | # Most other internal time limits are multiple of the node timeout. 828 | # 829 | # cluster-node-timeout 15000 830 | 831 | # A slave of a failing master will avoid to start a failover if its data 832 | # looks too old. 833 | # 834 | # There is no simple way for a slave to actually have an exact measure of 835 | # its "data age", so the following two checks are performed: 836 | # 837 | # 1) If there are multiple slaves able to failover, they exchange messages 838 | # in order to try to give an advantage to the slave with the best 839 | # replication offset (more data from the master processed). 840 | # Slaves will try to get their rank by offset, and apply to the start 841 | # of the failover a delay proportional to their rank. 842 | # 843 | # 2) Every single slave computes the time of the last interaction with 844 | # its master. This can be the last ping or command received (if the master 845 | # is still in the "connected" state), or the time that elapsed since the 846 | # disconnection with the master (if the replication link is currently down). 847 | # If the last interaction is too old, the slave will not try to failover 848 | # at all. 849 | # 850 | # The point "2" can be tuned by user. Specifically a slave will not perform 851 | # the failover if, since the last interaction with the master, the time 852 | # elapsed is greater than: 853 | # 854 | # (node-timeout * slave-validity-factor) + repl-ping-slave-period 855 | # 856 | # So for example if node-timeout is 30 seconds, and the slave-validity-factor 857 | # is 10, and assuming a default repl-ping-slave-period of 10 seconds, the 858 | # slave will not try to failover if it was not able to talk with the master 859 | # for longer than 310 seconds. 860 | # 861 | # A large slave-validity-factor may allow slaves with too old data to failover 862 | # a master, while a too small value may prevent the cluster from being able to 863 | # elect a slave at all. 864 | # 865 | # For maximum availability, it is possible to set the slave-validity-factor 866 | # to a value of 0, which means, that slaves will always try to failover the 867 | # master regardless of the last time they interacted with the master. 868 | # (However they'll always try to apply a delay proportional to their 869 | # offset rank). 870 | # 871 | # Zero is the only value able to guarantee that when all the partitions heal 872 | # the cluster will always be able to continue. 873 | # 874 | # cluster-slave-validity-factor 10 875 | 876 | # Cluster slaves are able to migrate to orphaned masters, that are masters 877 | # that are left without working slaves. This improves the cluster ability 878 | # to resist to failures as otherwise an orphaned master can't be failed over 879 | # in case of failure if it has no working slaves. 880 | # 881 | # Slaves migrate to orphaned masters only if there are still at least a 882 | # given number of other working slaves for their old master. This number 883 | # is the "migration barrier". A migration barrier of 1 means that a slave 884 | # will migrate only if there is at least 1 other working slave for its master 885 | # and so forth. It usually reflects the number of slaves you want for every 886 | # master in your cluster. 887 | # 888 | # Default is 1 (slaves migrate only if their masters remain with at least 889 | # one slave). To disable migration just set it to a very large value. 890 | # A value of 0 can be set but is useful only for debugging and dangerous 891 | # in production. 892 | # 893 | # cluster-migration-barrier 1 894 | 895 | # By default Redis Cluster nodes stop accepting queries if they detect there 896 | # is at least an hash slot uncovered (no available node is serving it). 897 | # This way if the cluster is partially down (for example a range of hash slots 898 | # are no longer covered) all the cluster becomes, eventually, unavailable. 899 | # It automatically returns available as soon as all the slots are covered again. 900 | # 901 | # However sometimes you want the subset of the cluster which is working, 902 | # to continue to accept queries for the part of the key space that is still 903 | # covered. In order to do so, just set the cluster-require-full-coverage 904 | # option to no. 905 | # 906 | # cluster-require-full-coverage yes 907 | 908 | # This option, when set to yes, prevents slaves from trying to failover its 909 | # master during master failures. However the master can still perform a 910 | # manual failover, if forced to do so. 911 | # 912 | # This is useful in different scenarios, especially in the case of multiple 913 | # data center operations, where we want one side to never be promoted if not 914 | # in the case of a total DC failure. 915 | # 916 | # cluster-slave-no-failover no 917 | 918 | # In order to setup your cluster make sure to read the documentation 919 | # available at http://redis.io web site. 920 | 921 | ########################## CLUSTER DOCKER/NAT support ######################## 922 | 923 | # In certain deployments, Redis Cluster nodes address discovery fails, because 924 | # addresses are NAT-ted or because ports are forwarded (the typical case is 925 | # Docker and other containers). 926 | # 927 | # In order to make Redis Cluster working in such environments, a static 928 | # configuration where each node knows its public address is needed. The 929 | # following two options are used for this scope, and are: 930 | # 931 | # * cluster-announce-ip 932 | # * cluster-announce-port 933 | # * cluster-announce-bus-port 934 | # 935 | # Each instruct the node about its address, client port, and cluster message 936 | # bus port. The information is then published in the header of the bus packets 937 | # so that other nodes will be able to correctly map the address of the node 938 | # publishing the information. 939 | # 940 | # If the above options are not used, the normal Redis Cluster auto-detection 941 | # will be used instead. 942 | # 943 | # Note that when remapped, the bus port may not be at the fixed offset of 944 | # clients port + 10000, so you can specify any port and bus-port depending 945 | # on how they get remapped. If the bus-port is not set, a fixed offset of 946 | # 10000 will be used as usually. 947 | # 948 | # Example: 949 | # 950 | # cluster-announce-ip 10.1.1.5 951 | # cluster-announce-port 6379 952 | # cluster-announce-bus-port 6380 953 | 954 | ################################## SLOW LOG ################################### 955 | 956 | # The Redis Slow Log is a system to log queries that exceeded a specified 957 | # execution time. The execution time does not include the I/O operations 958 | # like talking with the client, sending the reply and so forth, 959 | # but just the time needed to actually execute the command (this is the only 960 | # stage of command execution where the thread is blocked and can not serve 961 | # other requests in the meantime). 962 | # 963 | # You can configure the slow log with two parameters: one tells Redis 964 | # what is the execution time, in microseconds, to exceed in order for the 965 | # command to get logged, and the other parameter is the length of the 966 | # slow log. When a new command is logged the oldest one is removed from the 967 | # queue of logged commands. 968 | 969 | # The following time is expressed in microseconds, so 1000000 is equivalent 970 | # to one second. Note that a negative number disables the slow log, while 971 | # a value of zero forces the logging of every command. 972 | slowlog-log-slower-than 10000 973 | 974 | # There is no limit to this length. Just be aware that it will consume memory. 975 | # You can reclaim memory used by the slow log with SLOWLOG RESET. 976 | slowlog-max-len 128 977 | 978 | ################################ LATENCY MONITOR ############################## 979 | 980 | # The Redis latency monitoring subsystem samples different operations 981 | # at runtime in order to collect data related to possible sources of 982 | # latency of a Redis instance. 983 | # 984 | # Via the LATENCY command this information is available to the user that can 985 | # print graphs and obtain reports. 986 | # 987 | # The system only logs operations that were performed in a time equal or 988 | # greater than the amount of milliseconds specified via the 989 | # latency-monitor-threshold configuration directive. When its value is set 990 | # to zero, the latency monitor is turned off. 991 | # 992 | # By default latency monitoring is disabled since it is mostly not needed 993 | # if you don't have latency issues, and collecting data has a performance 994 | # impact, that while very small, can be measured under big load. Latency 995 | # monitoring can easily be enabled at runtime using the command 996 | # "CONFIG SET latency-monitor-threshold " if needed. 997 | latency-monitor-threshold 0 998 | 999 | ############################# EVENT NOTIFICATION ############################## 1000 | 1001 | # Redis can notify Pub/Sub clients about events happening in the key space. 1002 | # This feature is documented at http://redis.io/topics/notifications 1003 | # 1004 | # For instance if keyspace events notification is enabled, and a client 1005 | # performs a DEL operation on key "foo" stored in the Database 0, two 1006 | # messages will be published via Pub/Sub: 1007 | # 1008 | # PUBLISH __keyspace@0__:foo del 1009 | # PUBLISH __keyevent@0__:del foo 1010 | # 1011 | # It is possible to select the events that Redis will notify among a set 1012 | # of classes. Every class is identified by a single character: 1013 | # 1014 | # K Keyspace events, published with __keyspace@__ prefix. 1015 | # E Keyevent events, published with __keyevent@__ prefix. 1016 | # g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... 1017 | # $ String commands 1018 | # l List commands 1019 | # s Set commands 1020 | # h Hash commands 1021 | # z Sorted set commands 1022 | # x Expired events (events generated every time a key expires) 1023 | # e Evicted events (events generated when a key is evicted for maxmemory) 1024 | # A Alias for g$lshzxe, so that the "AKE" string means all the events. 1025 | # 1026 | # The "notify-keyspace-events" takes as argument a string that is composed 1027 | # of zero or multiple characters. The empty string means that notifications 1028 | # are disabled. 1029 | # 1030 | # Example: to enable list and generic events, from the point of view of the 1031 | # event name, use: 1032 | # 1033 | # notify-keyspace-events Elg 1034 | # 1035 | # Example 2: to get the stream of the expired keys subscribing to channel 1036 | # name __keyevent@0__:expired use: 1037 | # 1038 | # notify-keyspace-events Ex 1039 | # 1040 | # By default all notifications are disabled because most users don't need 1041 | # this feature and the feature has some overhead. Note that if you don't 1042 | # specify at least one of K or E, no events will be delivered. 1043 | notify-keyspace-events "" 1044 | 1045 | ############################### ADVANCED CONFIG ############################### 1046 | 1047 | # Hashes are encoded using a memory efficient data structure when they have a 1048 | # small number of entries, and the biggest entry does not exceed a given 1049 | # threshold. These thresholds can be configured using the following directives. 1050 | hash-max-ziplist-entries 512 1051 | hash-max-ziplist-value 64 1052 | 1053 | # Lists are also encoded in a special way to save a lot of space. 1054 | # The number of entries allowed per internal list node can be specified 1055 | # as a fixed maximum size or a maximum number of elements. 1056 | # For a fixed maximum size, use -5 through -1, meaning: 1057 | # -5: max size: 64 Kb <-- not recommended for normal workloads 1058 | # -4: max size: 32 Kb <-- not recommended 1059 | # -3: max size: 16 Kb <-- probably not recommended 1060 | # -2: max size: 8 Kb <-- good 1061 | # -1: max size: 4 Kb <-- good 1062 | # Positive numbers mean store up to _exactly_ that number of elements 1063 | # per list node. 1064 | # The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), 1065 | # but if your use case is unique, adjust the settings as necessary. 1066 | list-max-ziplist-size -2 1067 | 1068 | # Lists may also be compressed. 1069 | # Compress depth is the number of quicklist ziplist nodes from *each* side of 1070 | # the list to *exclude* from compression. The head and tail of the list 1071 | # are always uncompressed for fast push/pop operations. Settings are: 1072 | # 0: disable all list compression 1073 | # 1: depth 1 means "don't start compressing until after 1 node into the list, 1074 | # going from either the head or tail" 1075 | # So: [head]->node->node->...->node->[tail] 1076 | # [head], [tail] will always be uncompressed; inner nodes will compress. 1077 | # 2: [head]->[next]->node->node->...->node->[prev]->[tail] 1078 | # 2 here means: don't compress head or head->next or tail->prev or tail, 1079 | # but compress all nodes between them. 1080 | # 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] 1081 | # etc. 1082 | list-compress-depth 0 1083 | 1084 | # Sets have a special encoding in just one case: when a set is composed 1085 | # of just strings that happen to be integers in radix 10 in the range 1086 | # of 64 bit signed integers. 1087 | # The following configuration setting sets the limit in the size of the 1088 | # set in order to use this special memory saving encoding. 1089 | set-max-intset-entries 512 1090 | 1091 | # Similarly to hashes and lists, sorted sets are also specially encoded in 1092 | # order to save a lot of space. This encoding is only used when the length and 1093 | # elements of a sorted set are below the following limits: 1094 | zset-max-ziplist-entries 128 1095 | zset-max-ziplist-value 64 1096 | 1097 | # HyperLogLog sparse representation bytes limit. The limit includes the 1098 | # 16 bytes header. When an HyperLogLog using the sparse representation crosses 1099 | # this limit, it is converted into the dense representation. 1100 | # 1101 | # A value greater than 16000 is totally useless, since at that point the 1102 | # dense representation is more memory efficient. 1103 | # 1104 | # The suggested value is ~ 3000 in order to have the benefits of 1105 | # the space efficient encoding without slowing down too much PFADD, 1106 | # which is O(N) with the sparse encoding. The value can be raised to 1107 | # ~ 10000 when CPU is not a concern, but space is, and the data set is 1108 | # composed of many HyperLogLogs with cardinality in the 0 - 15000 range. 1109 | hll-sparse-max-bytes 3000 1110 | 1111 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in 1112 | # order to help rehashing the main Redis hash table (the one mapping top-level 1113 | # keys to values). The hash table implementation Redis uses (see dict.c) 1114 | # performs a lazy rehashing: the more operation you run into a hash table 1115 | # that is rehashing, the more rehashing "steps" are performed, so if the 1116 | # server is idle the rehashing is never complete and some more memory is used 1117 | # by the hash table. 1118 | # 1119 | # The default is to use this millisecond 10 times every second in order to 1120 | # actively rehash the main dictionaries, freeing memory when possible. 1121 | # 1122 | # If unsure: 1123 | # use "activerehashing no" if you have hard latency requirements and it is 1124 | # not a good thing in your environment that Redis can reply from time to time 1125 | # to queries with 2 milliseconds delay. 1126 | # 1127 | # use "activerehashing yes" if you don't have such hard requirements but 1128 | # want to free memory asap when possible. 1129 | activerehashing yes 1130 | 1131 | # The client output buffer limits can be used to force disconnection of clients 1132 | # that are not reading data from the server fast enough for some reason (a 1133 | # common reason is that a Pub/Sub client can't consume messages as fast as the 1134 | # publisher can produce them). 1135 | # 1136 | # The limit can be set differently for the three different classes of clients: 1137 | # 1138 | # normal -> normal clients including MONITOR clients 1139 | # slave -> slave clients 1140 | # pubsub -> clients subscribed to at least one pubsub channel or pattern 1141 | # 1142 | # The syntax of every client-output-buffer-limit directive is the following: 1143 | # 1144 | # client-output-buffer-limit 1145 | # 1146 | # A client is immediately disconnected once the hard limit is reached, or if 1147 | # the soft limit is reached and remains reached for the specified number of 1148 | # seconds (continuously). 1149 | # So for instance if the hard limit is 32 megabytes and the soft limit is 1150 | # 16 megabytes / 10 seconds, the client will get disconnected immediately 1151 | # if the size of the output buffers reach 32 megabytes, but will also get 1152 | # disconnected if the client reaches 16 megabytes and continuously overcomes 1153 | # the limit for 10 seconds. 1154 | # 1155 | # By default normal clients are not limited because they don't receive data 1156 | # without asking (in a push way), but just after a request, so only 1157 | # asynchronous clients may create a scenario where data is requested faster 1158 | # than it can read. 1159 | # 1160 | # Instead there is a default limit for pubsub and slave clients, since 1161 | # subscribers and slaves receive data in a push fashion. 1162 | # 1163 | # Both the hard or the soft limit can be disabled by setting them to zero. 1164 | client-output-buffer-limit normal 0 0 0 1165 | client-output-buffer-limit slave 256mb 64mb 60 1166 | client-output-buffer-limit pubsub 32mb 8mb 60 1167 | 1168 | # Client query buffers accumulate new commands. They are limited to a fixed 1169 | # amount by default in order to avoid that a protocol desynchronization (for 1170 | # instance due to a bug in the client) will lead to unbound memory usage in 1171 | # the query buffer. However you can configure it here if you have very special 1172 | # needs, such us huge multi/exec requests or alike. 1173 | # 1174 | # client-query-buffer-limit 1gb 1175 | 1176 | # In the Redis protocol, bulk requests, that are, elements representing single 1177 | # strings, are normally limited ot 512 mb. However you can change this limit 1178 | # here. 1179 | # 1180 | # proto-max-bulk-len 512mb 1181 | 1182 | # Redis calls an internal function to perform many background tasks, like 1183 | # closing connections of clients in timeout, purging expired keys that are 1184 | # never requested, and so forth. 1185 | # 1186 | # Not all tasks are performed with the same frequency, but Redis checks for 1187 | # tasks to perform according to the specified "hz" value. 1188 | # 1189 | # By default "hz" is set to 10. Raising the value will use more CPU when 1190 | # Redis is idle, but at the same time will make Redis more responsive when 1191 | # there are many keys expiring at the same time, and timeouts may be 1192 | # handled with more precision. 1193 | # 1194 | # The range is between 1 and 500, however a value over 100 is usually not 1195 | # a good idea. Most users should use the default of 10 and raise this up to 1196 | # 100 only in environments where very low latency is required. 1197 | hz 10 1198 | 1199 | # When a child rewrites the AOF file, if the following option is enabled 1200 | # the file will be fsync-ed every 32 MB of data generated. This is useful 1201 | # in order to commit the file to the disk more incrementally and avoid 1202 | # big latency spikes. 1203 | aof-rewrite-incremental-fsync yes 1204 | 1205 | # Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good 1206 | # idea to start with the default settings and only change them after investigating 1207 | # how to improve the performances and how the keys LFU change over time, which 1208 | # is possible to inspect via the OBJECT FREQ command. 1209 | # 1210 | # There are two tunable parameters in the Redis LFU implementation: the 1211 | # counter logarithm factor and the counter decay time. It is important to 1212 | # understand what the two parameters mean before changing them. 1213 | # 1214 | # The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis 1215 | # uses a probabilistic increment with logarithmic behavior. Given the value 1216 | # of the old counter, when a key is accessed, the counter is incremented in 1217 | # this way: 1218 | # 1219 | # 1. A random number R between 0 and 1 is extracted. 1220 | # 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). 1221 | # 3. The counter is incremented only if R < P. 1222 | # 1223 | # The default lfu-log-factor is 10. This is a table of how the frequency 1224 | # counter changes with a different number of accesses with different 1225 | # logarithmic factors: 1226 | # 1227 | # +--------+------------+------------+------------+------------+------------+ 1228 | # | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | 1229 | # +--------+------------+------------+------------+------------+------------+ 1230 | # | 0 | 104 | 255 | 255 | 255 | 255 | 1231 | # +--------+------------+------------+------------+------------+------------+ 1232 | # | 1 | 18 | 49 | 255 | 255 | 255 | 1233 | # +--------+------------+------------+------------+------------+------------+ 1234 | # | 10 | 10 | 18 | 142 | 255 | 255 | 1235 | # +--------+------------+------------+------------+------------+------------+ 1236 | # | 100 | 8 | 11 | 49 | 143 | 255 | 1237 | # +--------+------------+------------+------------+------------+------------+ 1238 | # 1239 | # NOTE: The above table was obtained by running the following commands: 1240 | # 1241 | # redis-benchmark -n 1000000 incr foo 1242 | # redis-cli object freq foo 1243 | # 1244 | # NOTE 2: The counter initial value is 5 in order to give new objects a chance 1245 | # to accumulate hits. 1246 | # 1247 | # The counter decay time is the time, in minutes, that must elapse in order 1248 | # for the key counter to be divided by two (or decremented if it has a value 1249 | # less <= 10). 1250 | # 1251 | # The default value for the lfu-decay-time is 1. A Special value of 0 means to 1252 | # decay the counter every time it happens to be scanned. 1253 | # 1254 | # lfu-log-factor 10 1255 | # lfu-decay-time 1 1256 | 1257 | ########################### ACTIVE DEFRAGMENTATION ####################### 1258 | # 1259 | # WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested 1260 | # even in production and manually tested by multiple engineers for some 1261 | # time. 1262 | # 1263 | # What is active defragmentation? 1264 | # ------------------------------- 1265 | # 1266 | # Active (online) defragmentation allows a Redis server to compact the 1267 | # spaces left between small allocations and deallocations of data in memory, 1268 | # thus allowing to reclaim back memory. 1269 | # 1270 | # Fragmentation is a natural process that happens with every allocator (but 1271 | # less so with Jemalloc, fortunately) and certain workloads. Normally a server 1272 | # restart is needed in order to lower the fragmentation, or at least to flush 1273 | # away all the data and create it again. However thanks to this feature 1274 | # implemented by Oran Agra for Redis 4.0 this process can happen at runtime 1275 | # in an "hot" way, while the server is running. 1276 | # 1277 | # Basically when the fragmentation is over a certain level (see the 1278 | # configuration options below) Redis will start to create new copies of the 1279 | # values in contiguous memory regions by exploiting certain specific Jemalloc 1280 | # features (in order to understand if an allocation is causing fragmentation 1281 | # and to allocate it in a better place), and at the same time, will release the 1282 | # old copies of the data. This process, repeated incrementally for all the keys 1283 | # will cause the fragmentation to drop back to normal values. 1284 | # 1285 | # Important things to understand: 1286 | # 1287 | # 1. This feature is disabled by default, and only works if you compiled Redis 1288 | # to use the copy of Jemalloc we ship with the source code of Redis. 1289 | # This is the default with Linux builds. 1290 | # 1291 | # 2. You never need to enable this feature if you don't have fragmentation 1292 | # issues. 1293 | # 1294 | # 3. Once you experience fragmentation, you can enable this feature when 1295 | # needed with the command "CONFIG SET activedefrag yes". 1296 | # 1297 | # The configuration parameters are able to fine tune the behavior of the 1298 | # defragmentation process. If you are not sure about what they mean it is 1299 | # a good idea to leave the defaults untouched. 1300 | 1301 | # Enabled active defragmentation 1302 | # activedefrag yes 1303 | 1304 | # Minimum amount of fragmentation waste to start active defrag 1305 | # active-defrag-ignore-bytes 100mb 1306 | 1307 | # Minimum percentage of fragmentation to start active defrag 1308 | # active-defrag-threshold-lower 10 1309 | 1310 | # Maximum percentage of fragmentation at which we use maximum effort 1311 | # active-defrag-threshold-upper 100 1312 | 1313 | # Minimal effort for defrag in CPU percentage 1314 | # active-defrag-cycle-min 25 1315 | 1316 | # Maximal effort for defrag in CPU percentage 1317 | # active-defrag-cycle-max 75 1318 | -------------------------------------------------------------------------------- /lookup/run_redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | # set -x 5 | 6 | if [ -f ../../valkey/src/valkey-server ]; then 7 | if [[ ` ../../valkey/src/valkey-server -v` == *"v=7."* ]] ; then 8 | echo "You're using valkey 7, please upgrade do valkey 8" 9 | exit 1 10 | fi 11 | ../../valkey/src/valkey-server ./lookup.conf 12 | elif [ -f ../../redis/src/redis-server ]; then 13 | if [[ ` ../../redis/src/redis-server -v` == *"v=7."* ]] ; then 14 | echo "You're using redis 7, please upgrade do valkey 8"; 15 | exit 1 16 | fi 17 | ../../redis/src/redis-server ./lookup.conf 18 | else 19 | if [[ `/usr/bin/redis-server -v` == *"v=7."* ]] ; then 20 | echo "You're using redis 7, please upgrade do valkey 8"; 21 | exit 1 22 | fi 23 | echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2 24 | /usr/bin/redis-server ./lookup.conf 25 | fi 26 | -------------------------------------------------------------------------------- /lookup/shutdown_redis.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -x 5 | 6 | ../../redis/src/redis-cli -s ./lookup.sock shutdown 7 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | ignore_errors = False 4 | 5 | show_error_context = True 6 | pretty = True 7 | exclude = cdnjs|client 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sanejs" 3 | version = "2.1" 4 | description = "Lookup service for known legitimate JavaScript." 5 | authors = [{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}] 6 | license = "BSD-3-Clause" 7 | requires-python = ">=3.12,<4.0" 8 | 9 | dynamic = [ "classifiers" ] 10 | 11 | dependencies = [ 12 | "gitpython (>=3.1.44)", 13 | "redis[hiredis] (>=5.2.1)", 14 | "flask (>=3.1.0)", 15 | "gunicorn (>=23.0.0)", 16 | "flask-restx (>=1.3.0)", 17 | "pysanejs (>=2.0.4)", 18 | "werkzeug (>=3.1.3)", 19 | "orjson (>=3.10.16)", 20 | ] 21 | 22 | [tool.poetry] 23 | classifiers=[ 24 | 'Development Status :: 3 - Alpha', 25 | 'Environment :: Console', 26 | 'Operating System :: POSIX :: Linux', 27 | 'Intended Audience :: Science/Research', 28 | 'Intended Audience :: Telecommunications Industry', 29 | 'Intended Audience :: Information Technology', 30 | 'Topic :: Security', 31 | 'Topic :: Internet', 32 | ] 33 | 34 | [project.scripts] 35 | start_website = "bin.start_website:main" 36 | start = "bin.start:main" 37 | run_backend = "bin.run_backend:main" 38 | build_hashes = "bin.build_hashes:main" 39 | shutdown = "bin.shutdown:main" 40 | stop = "bin.stop:main" 41 | update = "bin.update:main" 42 | 43 | [tool.poetry.group.dev.dependencies] 44 | mypy = "^1.15.0" 45 | types-redis = "^4.6.0.20241004" 46 | ipython = "^9.1.0" 47 | 48 | [build-system] 49 | requires = ["poetry-core>=2.0"] 50 | build-backend = "poetry.core.masonry.api" 51 | -------------------------------------------------------------------------------- /sanejs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lookyloo/sanejs/13cf4763231f9bc899fbf12565f3225a3a77def0/sanejs/__init__.py -------------------------------------------------------------------------------- /sanejs/default/__init__.py: -------------------------------------------------------------------------------- 1 | env_global_name: str = 'SANEJS_HOME' 2 | 3 | from .exceptions import SaneJSException # noqa 4 | 5 | # NOTE: the imports below are there to avoid too long paths when importing the 6 | # classes/methods in the rest of the project while keeping all that in a subdirectory 7 | # and allow to update them easily. 8 | # You should not have to change anything in this file below this line. 9 | 10 | from .abstractmanager import AbstractManager # noqa 11 | 12 | from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noqa 13 | 14 | from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa 15 | -------------------------------------------------------------------------------- /sanejs/default/abstractmanager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import asyncio 4 | import logging 5 | import signal 6 | import time 7 | from abc import ABC 8 | from datetime import datetime, timedelta 9 | from subprocess import Popen 10 | from typing import List, Optional, Tuple 11 | 12 | from redis import Redis 13 | from redis.exceptions import ConnectionError 14 | 15 | from .helpers import get_socket_path 16 | 17 | 18 | class AbstractManager(ABC): 19 | 20 | script_name: str 21 | 22 | def __init__(self, loglevel: int=logging.DEBUG): 23 | self.loglevel = loglevel 24 | self.logger = logging.getLogger(f'{self.__class__.__name__}') 25 | self.logger.setLevel(loglevel) 26 | self.logger.info(f'Initializing {self.__class__.__name__}') 27 | self.process: Optional[Popen] = None 28 | self.__redis = Redis(unix_socket_path=get_socket_path('lookup'), db=1, decode_responses=True) 29 | 30 | @staticmethod 31 | def is_running() -> List[Tuple[str, float]]: 32 | try: 33 | r = Redis(unix_socket_path=get_socket_path('lookup'), db=1, decode_responses=True) 34 | return r.zrangebyscore('running', '-inf', '+inf', withscores=True) 35 | except ConnectionError: 36 | print('Unable to connect to redis, the system is down.') 37 | return [] 38 | 39 | @staticmethod 40 | def force_shutdown(): 41 | try: 42 | r = Redis(unix_socket_path=get_socket_path('lookup'), db=1, decode_responses=True) 43 | r.set('shutdown', 1) 44 | except ConnectionError: 45 | print('Unable to connect to redis, the system is down.') 46 | 47 | def set_running(self) -> None: 48 | self.__redis.zincrby('running', 1, self.script_name) 49 | 50 | def unset_running(self) -> None: 51 | current_running = self.__redis.zincrby('running', -1, self.script_name) 52 | if int(current_running) <= 0: 53 | self.__redis.zrem('running', self.script_name) 54 | 55 | def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool: 56 | if shutdown_check > sleep_in_sec: 57 | shutdown_check = sleep_in_sec 58 | sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec) 59 | while sleep_until > datetime.now(): 60 | time.sleep(shutdown_check) 61 | if self.shutdown_requested(): 62 | return False 63 | return True 64 | 65 | async def long_sleep_async(self, sleep_in_sec: int, shutdown_check: int=10) -> bool: 66 | if shutdown_check > sleep_in_sec: 67 | shutdown_check = sleep_in_sec 68 | sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec) 69 | while sleep_until > datetime.now(): 70 | await asyncio.sleep(shutdown_check) 71 | if self.shutdown_requested(): 72 | return False 73 | return True 74 | 75 | def shutdown_requested(self) -> bool: 76 | try: 77 | return True if self.__redis.exists('shutdown') else False 78 | except ConnectionRefusedError: 79 | return True 80 | except ConnectionError: 81 | return True 82 | 83 | def _to_run_forever(self) -> None: 84 | pass 85 | 86 | def _kill_process(self): 87 | if self.process is None: 88 | return 89 | kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL] 90 | for sig in kill_order: 91 | if self.process.poll() is None: 92 | self.logger.info(f'Sending {sig} to {self.process.pid}.') 93 | self.process.send_signal(sig) 94 | time.sleep(1) 95 | else: 96 | break 97 | else: 98 | self.logger.warning(f'Unable to kill {self.process.pid}, keep sending SIGKILL') 99 | while self.process.poll() is None: 100 | self.process.send_signal(signal.SIGKILL) 101 | time.sleep(1) 102 | 103 | def run(self, sleep_in_sec: int) -> None: 104 | self.logger.info(f'Launching {self.__class__.__name__}') 105 | try: 106 | while True: 107 | if self.shutdown_requested(): 108 | break 109 | try: 110 | if self.process: 111 | if self.process.poll() is not None: 112 | self.logger.critical(f'Unable to start {self.script_name}.') 113 | break 114 | else: 115 | self.set_running() 116 | self._to_run_forever() 117 | except Exception: 118 | self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') 119 | finally: 120 | if not self.process: 121 | # self.process means we run an external script, all the time, 122 | # do not unset between sleep. 123 | self.unset_running() 124 | if not self.long_sleep(sleep_in_sec): 125 | break 126 | except KeyboardInterrupt: 127 | self.logger.warning(f'{self.script_name} killed by user.') 128 | finally: 129 | if self.process: 130 | self._kill_process() 131 | try: 132 | self.unset_running() 133 | except Exception: 134 | # the services can already be down at that point. 135 | pass 136 | self.logger.info(f'Shutting down {self.__class__.__name__}') 137 | 138 | async def _to_run_forever_async(self) -> None: 139 | pass 140 | 141 | async def run_async(self, sleep_in_sec: int) -> None: 142 | self.logger.info(f'Launching {self.__class__.__name__}') 143 | try: 144 | while True: 145 | if self.shutdown_requested(): 146 | break 147 | try: 148 | if self.process: 149 | if self.process.poll() is not None: 150 | self.logger.critical(f'Unable to start {self.script_name}.') 151 | break 152 | else: 153 | self.set_running() 154 | await self._to_run_forever_async() 155 | except Exception: 156 | self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') 157 | finally: 158 | if not self.process: 159 | # self.process means we run an external script, all the time, 160 | # do not unset between sleep. 161 | self.unset_running() 162 | if not await self.long_sleep_async(sleep_in_sec): 163 | break 164 | except KeyboardInterrupt: 165 | self.logger.warning(f'{self.script_name} killed by user.') 166 | finally: 167 | if self.process: 168 | self._kill_process() 169 | try: 170 | self.unset_running() 171 | except Exception: 172 | # the services can already be down at that point. 173 | pass 174 | self.logger.info(f'Shutting down {self.__class__.__name__}') 175 | -------------------------------------------------------------------------------- /sanejs/default/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | class SaneJSException(Exception): 5 | pass 6 | 7 | 8 | class MissingEnv(SaneJSException): 9 | pass 10 | 11 | 12 | class CreateDirectoryException(SaneJSException): 13 | pass 14 | 15 | 16 | class ConfigError(SaneJSException): 17 | pass 18 | -------------------------------------------------------------------------------- /sanejs/default/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import json 3 | import logging 4 | import os 5 | from functools import lru_cache 6 | from pathlib import Path 7 | from typing import Any, Dict, Optional, Union 8 | 9 | from . import env_global_name 10 | from .exceptions import ConfigError, CreateDirectoryException, MissingEnv 11 | 12 | configs: Dict[str, Dict[str, Any]] = {} 13 | logger = logging.getLogger('Helpers') 14 | 15 | 16 | @lru_cache(64) 17 | def get_homedir() -> Path: 18 | if not os.environ.get(env_global_name): 19 | # Try to open a .env file in the home directory if it exists. 20 | if (Path(__file__).resolve().parent.parent.parent / '.env').exists(): 21 | with (Path(__file__).resolve().parent.parent.parent / '.env').open() as f: 22 | for line in f: 23 | key, value = line.strip().split('=', 1) 24 | if value[0] in ['"', "'"]: 25 | value = value[1:-1] 26 | os.environ[key] = value 27 | 28 | if not os.environ.get(env_global_name): 29 | guessed_home = Path(__file__).resolve().parent.parent.parent 30 | raise MissingEnv(f"{env_global_name} is missing. \ 31 | Run the following command (assuming you run the code from the clonned repository):\ 32 | export {env_global_name}='{guessed_home}'") 33 | return Path(os.environ[env_global_name]) 34 | 35 | 36 | @lru_cache(64) 37 | def load_configs(path_to_config_files: Optional[Union[str, Path]]=None): 38 | global configs 39 | if configs: 40 | return 41 | if path_to_config_files: 42 | if isinstance(path_to_config_files, str): 43 | config_path = Path(path_to_config_files) 44 | else: 45 | config_path = path_to_config_files 46 | else: 47 | config_path = get_homedir() / 'config' 48 | if not config_path.exists(): 49 | raise ConfigError(f'Configuration directory {config_path} does not exists.') 50 | elif not config_path.is_dir(): 51 | raise ConfigError(f'Configuration directory {config_path} is not a directory.') 52 | 53 | configs = {} 54 | for path in config_path.glob('*.json'): 55 | with path.open() as _c: 56 | configs[path.stem] = json.load(_c) 57 | 58 | 59 | @lru_cache(64) 60 | def get_config(config_type: str, entry: Optional[str]=None, quiet: bool=False) -> Any: 61 | """Get an entry from the given config_type file. Automatic fallback to the sample file""" 62 | global configs 63 | if not configs: 64 | load_configs() 65 | if config_type in configs: 66 | if entry: 67 | if entry in configs[config_type]: 68 | return configs[config_type][entry] 69 | else: 70 | if not quiet: 71 | logger.warning(f'Unable to find {entry} in config file.') 72 | else: 73 | return configs[config_type] 74 | else: 75 | if not quiet: 76 | logger.warning(f'No {config_type} config file available.') 77 | if not quiet: 78 | logger.warning(f'Falling back on sample config, please initialize the {config_type} config file.') 79 | with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c: 80 | sample_config = json.load(_c) 81 | if entry: 82 | return sample_config[entry] 83 | return sample_config 84 | 85 | 86 | def safe_create_dir(to_create: Path) -> None: 87 | if to_create.exists() and not to_create.is_dir(): 88 | raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory') 89 | to_create.mkdir(parents=True, exist_ok=True) 90 | 91 | 92 | def get_socket_path(name: str) -> str: 93 | mapping = { 94 | 'lookup': Path('lookup', 'lookup.sock'), 95 | } 96 | return str(get_homedir() / mapping[name]) 97 | 98 | 99 | def try_make_file(filename: Path): 100 | try: 101 | filename.touch(exist_ok=False) 102 | return True 103 | except FileExistsError: 104 | return False 105 | -------------------------------------------------------------------------------- /sanejs/query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | from redis import Redis 4 | 5 | from .default import get_homedir, get_socket_path 6 | 7 | 8 | class Query(): 9 | 10 | def __init__(self, loglevel: int=logging.DEBUG) -> None: 11 | self.__init_logger(loglevel) 12 | self.libs_path = get_homedir() / 'cdnjs' / 'ajax' / 'libs' 13 | self.redis_lookup = Redis(unix_socket_path=get_socket_path('lookup'), decode_responses=True) 14 | 15 | def __init_logger(self, loglevel: int): 16 | self.logger = logging.getLogger(f'{self.__class__.__name__}') 17 | self.logger.setLevel(loglevel) 18 | 19 | @property 20 | def is_ready(self): 21 | return self.redis_lookup.get('ready') is not None 22 | 23 | def search_hash(self, sha512: str | list): 24 | if not self.is_ready: 25 | return {'error': 'The hashes are not all loaded yet, try again later.'} 26 | to_return: dict[str, list] = {'response': []} 27 | if isinstance(sha512, str): 28 | to_return['response'] = list(self.redis_lookup.smembers(sha512)) 29 | else: 30 | p = self.redis_lookup.pipeline() 31 | [p.smembers(s) for s in sha512] 32 | to_return['response'] = [list(r) for r in p.execute()] 33 | return to_return 34 | 35 | def search_lib(self, library: str | list, version: str | None=None): 36 | if not self.is_ready: 37 | return {'error': 'The hashes are not all loaded yet, try again later.'} 38 | to_return: dict[str, list | dict] = {'response': []} 39 | if isinstance(library, str): 40 | if version: 41 | to_return['response'] = {library: {version: self.redis_lookup.hgetall(f'{library}|{version}')}} 42 | else: 43 | p = self.redis_lookup.pipeline() 44 | versions = self.redis_lookup.smembers(library) 45 | [p.hgetall(f'{library}|{version}') for version in versions] 46 | to_return['response'] = {library: dict(zip(versions, p.execute()))} 47 | else: 48 | # version doesn't make sense here but if the string contains |{version}, we directly get that 49 | to_return_temp = [] 50 | for lib in library: 51 | if '|' in lib: 52 | libname, version = lib.split('|') 53 | to_return_temp.append({libname: {version: self.redis_lookup.hgetall(lib)}}) 54 | else: 55 | p = self.redis_lookup.pipeline() 56 | versions = self.redis_lookup.smembers(lib) 57 | [p.hgetall(f'{lib}|{version}') for version in versions] 58 | to_return_temp.append({lib: dict(zip(versions, p.execute()))}) 59 | 60 | to_return['response'] = to_return_temp 61 | return to_return 62 | -------------------------------------------------------------------------------- /sanejs/sanejs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import gzip 4 | import hashlib 5 | import orjson 6 | import os 7 | import time 8 | 9 | from redis import Redis 10 | from git import Repo # type: ignore 11 | 12 | from .default import get_homedir, get_socket_path 13 | 14 | """ 15 | sha: set of libname|version|fullpath 16 | 17 | libname|version: hash of fullpath -> sha 18 | """ 19 | 20 | # We assume the initialisation of the submodule is done before calling this class. 21 | 22 | 23 | class SaneJS(): 24 | 25 | def __init__(self, loglevel: int=logging.DEBUG) -> None: 26 | self.__init_logger(loglevel) 27 | self.libs_path = get_homedir() / 'cdnjs' / 'ajax' / 'libs' 28 | self.redis_lookup = Redis(unix_socket_path=get_socket_path('lookup'), decode_responses=True) 29 | self.cdnjs_repo = Repo(str(get_homedir() / 'cdnjs')) 30 | 31 | def __init_logger(self, loglevel: int): 32 | self.logger = logging.getLogger(f'{self.__class__.__name__}') 33 | self.logger.setLevel(loglevel) 34 | 35 | def _pull_dnsjs(self): 36 | last_commit_ts = self.redis_lookup.get('last_commit') 37 | if not last_commit_ts or int(last_commit_ts) < time.time() - 10000: 38 | self.cdnjs_repo.remote('origin').pull() 39 | return True 40 | return False 41 | 42 | def compute_hashes(self, force_recache: bool=False, force_rehash: bool=False) -> None: 43 | '''Compute the hashes for the (new) files, create a file in the root directory of each library''' 44 | if not self._pull_dnsjs(): 45 | return 46 | if force_recache: 47 | self.logger.info('Force re-cache everything.') 48 | self.redis_lookup.flushdb() 49 | self.logger.info('Loading hashes...') 50 | counter = 0 51 | for libname in self.libs_path.iterdir(): 52 | # libname is the path to the library, it contains a directory for each version 53 | if not libname.is_dir(): 54 | continue 55 | if counter % 100 == 0: 56 | self.logger.info(f'Loaded {counter} librairies...') 57 | counter += 1 58 | got_new_versions = False 59 | libname_hashes = libname / 'hashes.json.gz' 60 | if libname_hashes.exists(): 61 | try: 62 | with gzip.open(libname_hashes, 'rb') as f: 63 | # We have the hashes, we can skip this library 64 | if _content := f.read(): 65 | all_hashes_lib = orjson.loads(_content) 66 | else: 67 | # force rewriting the file. 68 | got_new_versions = True 69 | all_hashes_lib = {} 70 | except Exception as e: 71 | self.logger.warning(f'Unable to process hashes for {libname}: {e}') 72 | libname_hashes.unlink() 73 | self.logger.debug(f'Processing {libname.name}.') 74 | for version in libname.iterdir(): 75 | # This is the directory for a version of the library. It can contain all kind of directories and files 76 | if not version.is_dir(): 77 | if version.name not in ['package.json', 'hashes.json.gz', '.donotoptimizepng']: 78 | # packages.json is expected, and we don't care 79 | self.logger.warning(f'That is it Oo -> {version}.') 80 | continue 81 | 82 | if (libname.name in all_hashes_lib 83 | and version.name in all_hashes_lib[libname.name] 84 | and not force_rehash 85 | and not force_recache): 86 | # This version was already loaded 87 | # Unless we rehash or recache, we can skip it 88 | continue 89 | 90 | version_hashes_path = version / 'hashes.json.gz' 91 | if (version_hashes_path.exists() 92 | and os.path.getsize(version_hashes_path) 93 | and not force_rehash): 94 | # We have the hashes, we can skip this version 95 | try: 96 | with gzip.open(version_hashes_path, 'rb') as f: 97 | to_save = orjson.loads(f.read()) 98 | except Exception as e: 99 | self.logger.warning(f'Unable to process hashes for {version}: {e}') 100 | version_hashes_path.unlink() 101 | force_recache = True 102 | if force_recache: 103 | # Only re-cache the hashes if requested. 104 | p = self.redis_lookup.pipeline() 105 | for filepath, f_hash in to_save.items(): 106 | p.sadd(f_hash['newline'], f'{libname.name}|{version.name}|{filepath}') 107 | p.sadd(f_hash['no_newline'], f'{libname.name}|{version.name}|{filepath}') 108 | p.hset(f'{libname.name}|{version.name}', filepath, f_hash['default']) 109 | p.sadd(libname.name, version.name) 110 | p.execute() 111 | else: 112 | # We need to compute the hashes 113 | got_new_versions = True 114 | self.logger.info(f'Got new version for {libname.name}: {version.name}.') 115 | to_save = {} 116 | p = self.redis_lookup.pipeline() 117 | for to_hash in version.glob('**/*'): 118 | if not to_hash.is_file() or to_hash.name == 'hashes.json.gz': 119 | continue 120 | # The file may or may not have a new line at the end. 121 | # The files we want to check against may or may not have the new line at the end. 122 | # We will compute both hashes. 123 | with to_hash.open('rb') as f_to_h: 124 | content = f_to_h.read() 125 | file_hash_default = hashlib.sha512(content) 126 | if content: 127 | if content[-1:] == b'\n': 128 | # has newline 129 | file_hash_newline = hashlib.sha512(content) 130 | file_hash_no_newline = hashlib.sha512(content[:-1]) 131 | else: 132 | # Doesn't have newline 133 | file_hash_no_newline = hashlib.sha512(content) 134 | file_hash_newline = hashlib.sha512(content + b'\n') 135 | else: 136 | # Empty file 137 | file_hash_newline = file_hash_default 138 | file_hash_newline = file_hash_default 139 | filepath = to_hash.as_posix().replace(version.as_posix() + '/', '') 140 | to_save[filepath] = {'newline': file_hash_newline.hexdigest(), 'no_newline': file_hash_no_newline.hexdigest(), 'default': file_hash_default.hexdigest()} 141 | p.sadd(file_hash_newline.hexdigest(), f'{libname.name}|{version.name}|{filepath}') 142 | p.sadd(file_hash_no_newline.hexdigest(), f'{libname.name}|{version.name}|{filepath}') 143 | p.hset(f'{libname.name}|{version.name}', filepath, file_hash_default.hexdigest()) 144 | p.sadd(libname.name, version.name) 145 | p.execute() 146 | with gzip.open(version / 'hashes.json.gz', 'wb') as f: 147 | # Save the hashes in the directory (aka cache it) 148 | f.write(orjson.dumps(to_save)) 149 | all_hashes_lib[version.name] = to_save 150 | if got_new_versions: 151 | with gzip.open(libname / 'hashes.json.gz', 'wb') as f: 152 | # Write a file with all the hashes for all the versions at the root directory of the library 153 | f.write(orjson.dumps(all_hashes_lib)) 154 | self.redis_lookup.sadd('all_libraries', libname.name) 155 | self.redis_lookup.set('ready', 1) 156 | self.logger.info('... done loading hashes.') 157 | -------------------------------------------------------------------------------- /tools/validate_config_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import logging 5 | import argparse 6 | 7 | from sanejs.default import get_homedir 8 | 9 | 10 | def validate_generic_config_file(): 11 | sample_config = get_homedir() / 'config' / 'generic.json.sample' 12 | with sample_config.open() as f: 13 | generic_config_sample = json.load(f) 14 | # Check documentation 15 | for key in generic_config_sample.keys(): 16 | if key == '_notes': 17 | continue 18 | if key not in generic_config_sample['_notes']: 19 | raise Exception(f'###### - Documentation missing for {key}') 20 | 21 | user_config = get_homedir() / 'config' / 'generic.json' 22 | if not user_config.exists(): 23 | # The config file was never created, copy the sample. 24 | with user_config.open('w') as _fw: 25 | json.dump(generic_config_sample, _fw) 26 | 27 | with user_config.open() as f: 28 | generic_config = json.load(f) 29 | 30 | # Check all entries in the sample files are in the user file, and they have the same type 31 | for key in generic_config_sample.keys(): 32 | if key == '_notes': 33 | continue 34 | if generic_config.get(key) is None: 35 | logger.warning(f'Entry missing in user config file: {key}. Will default to: {generic_config_sample[key]}') 36 | continue 37 | if not isinstance(generic_config[key], type(generic_config_sample[key])): 38 | raise Exception(f'Invalid type for {key}. Got: {type(generic_config[key])} ({generic_config[key]}), expected: {type(generic_config_sample[key])} ({generic_config_sample[key]})') 39 | 40 | if isinstance(generic_config[key], dict): 41 | # Check entries 42 | for sub_key in generic_config_sample[key].keys(): 43 | if sub_key not in generic_config[key]: 44 | raise Exception(f'{sub_key} is missing in generic_config[key]. Default from sample file: {generic_config_sample[key][sub_key]}') 45 | if not isinstance(generic_config[key][sub_key], type(generic_config_sample[key][sub_key])): 46 | raise Exception(f'Invalid type for {sub_key} in {key}. Got: {type(generic_config[key][sub_key])} ({generic_config[key][sub_key]}), expected: {type(generic_config_sample[key][sub_key])} ({generic_config_sample[key][sub_key]})') 47 | 48 | # Make sure the user config file doesn't have entries missing in the sample config 49 | for key in generic_config.keys(): 50 | if key not in generic_config_sample: 51 | raise Exception(f'{key} is missing in the sample config file. You need to compare {user_config} with {sample_config}.') 52 | 53 | return True 54 | 55 | 56 | def update_user_configs(): 57 | for file_name in ['generic']: 58 | with (get_homedir() / 'config' / f'{file_name}.json').open() as f: 59 | try: 60 | generic_config = json.load(f) 61 | except Exception: 62 | generic_config = {} 63 | with (get_homedir() / 'config' / f'{file_name}.json.sample').open() as f: 64 | generic_config_sample = json.load(f) 65 | 66 | has_new_entry = False 67 | for key in generic_config_sample.keys(): 68 | if key == '_notes': 69 | continue 70 | if generic_config.get(key) is None: 71 | print(f'{key} was missing in {file_name}, adding it.') 72 | print(f"Description: {generic_config_sample['_notes'][key]}") 73 | generic_config[key] = generic_config_sample[key] 74 | has_new_entry = True 75 | elif isinstance(generic_config[key], dict): 76 | for sub_key in generic_config_sample[key].keys(): 77 | if sub_key not in generic_config[key]: 78 | print(f'{sub_key} was missing in {key} from {file_name}, adding it.') 79 | generic_config[key][sub_key] = generic_config_sample[key][sub_key] 80 | has_new_entry = True 81 | if has_new_entry: 82 | with (get_homedir() / 'config' / f'{file_name}.json').open('w') as fw: 83 | json.dump(generic_config, fw, indent=2, sort_keys=True) 84 | return has_new_entry 85 | 86 | 87 | if __name__ == '__main__': 88 | logger = logging.getLogger('Config validator') 89 | parser = argparse.ArgumentParser(description='Check the config files.') 90 | parser.add_argument('--check', default=False, action='store_true', help='Check if the sample config and the user config are in-line') 91 | parser.add_argument('--update', default=False, action='store_true', help='Update the user config with the entries from the sample config if entries are missing') 92 | args = parser.parse_args() 93 | 94 | if args.check: 95 | if validate_generic_config_file(): 96 | print(f"The entries in {get_homedir() / 'config' / 'generic.json'} are valid.") 97 | 98 | if args.update: 99 | if not update_user_configs(): 100 | print(f"No updates needed in {get_homedir() / 'config' / 'generic.json'}.") 101 | -------------------------------------------------------------------------------- /website/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lookyloo/sanejs/13cf4763231f9bc899fbf12565f3225a3a77def0/website/__init__.py -------------------------------------------------------------------------------- /website/logs/.keepdir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lookyloo/sanejs/13cf4763231f9bc899fbf12565f3225a3a77def0/website/logs/.keepdir -------------------------------------------------------------------------------- /website/web/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import logging.config 5 | 6 | from importlib.metadata import version 7 | 8 | from flask import Flask, request 9 | from flask_restx import Api, Resource, fields # type: ignore 10 | 11 | from sanejs.default import get_config 12 | from sanejs.query import Query 13 | 14 | from .proxied import ReverseProxied 15 | 16 | logging.config.dictConfig(get_config('logging')) 17 | 18 | app = Flask(__name__) 19 | 20 | app.wsgi_app = ReverseProxied(app.wsgi_app) # type: ignore 21 | 22 | api = Api(app, title='SaneJS API', 23 | description='API to query a SaneJS instance.', 24 | version=version('sanejs')) 25 | 26 | q = Query() 27 | 28 | 29 | sha512_fields = api.model('SHA512Fields', { 30 | 'sha512': fields.String(description="The SHA512 to search", required=True) 31 | }) 32 | 33 | 34 | library_fields = api.model('LibraryFields', { 35 | 'library': fields.String(description="The library to search", required=True) 36 | }) 37 | 38 | 39 | @api.route('/sha512') 40 | @api.doc(description='Get the entries related to this hash (sha512)') 41 | class SHA512(Resource): 42 | 43 | @api.param('sha512', 'The hash to check', required=True) 44 | def get(self): 45 | if 'sha512' not in request.args or not request.args.get('sha512'): 46 | return {'error': 'The hash is required...'}, 400 47 | return q.search_hash(request.args['sha512']) 48 | 49 | @api.doc(body=sha512_fields) 50 | def post(self): 51 | try: 52 | req_data = request.get_json(force=True) 53 | except Exception as e: 54 | return {'error': e} 55 | 56 | if not req_data.get('sha512'): 57 | return {'error': 'The key "sha512" is required.'} 58 | return q.search_hash(req_data['sha512']) 59 | 60 | 61 | @api.route('/library') 62 | @api.doc(description='Get the entries related to this library') 63 | class Library(Resource): 64 | 65 | @api.param('library', 'The library name to check', required=True) 66 | def get(self): 67 | if 'library' not in request.args or not request.args.get('library'): 68 | return {'error': 'The library is required...'}, 400 69 | return q.search_lib(request.args['library']) 70 | 71 | @api.doc(body=library_fields) 72 | def post(self): 73 | try: 74 | req_data = request.get_json(force=True) 75 | except Exception as e: 76 | return {'error': e} 77 | 78 | if not req_data.get('library'): 79 | return {'error': 'The key "library" is required.'} 80 | 81 | if 'version' in req_data: 82 | return q.search_lib(req_data['library'], req_data['version']) 83 | else: 84 | return q.search_lib(req_data['library']) 85 | -------------------------------------------------------------------------------- /website/web/proxied.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from typing import Any, MutableMapping 3 | 4 | 5 | class ReverseProxied(): 6 | def __init__(self, app: Any) -> None: 7 | self.app = app 8 | 9 | def __call__(self, environ: MutableMapping[str, Any], start_response: Any) -> Any: 10 | scheme = environ.get('HTTP_X_FORWARDED_PROTO') 11 | if not scheme: 12 | scheme = environ.get('HTTP_X_SCHEME') 13 | 14 | if scheme: 15 | environ['wsgi.url_scheme'] = scheme 16 | return self.app(environ, start_response) 17 | --------------------------------------------------------------------------------