├── .gitignore ├── .gitmodules ├── LICENSE ├── README.org ├── bin └── build.sh ├── doc ├── img │ ├── cli-33E400e1.png │ └── en_square_cef_logo.png ├── usage.html ├── usage.org └── usage.pdf ├── pastelyzer.asd ├── public ├── dash.html ├── pastelyzer.css └── pastelyzer.js ├── src ├── circl-paste.lisp ├── circl-zmq.lisp ├── cli.lisp ├── compat.lisp ├── config │ ├── cmd.lisp │ ├── context.lisp │ ├── filter.lisp │ ├── loader.lisp │ ├── package.lisp │ ├── sets.lisp │ ├── sink.lisp │ ├── smtp.lisp │ └── util.lisp ├── db.lisp ├── deflate.lisp ├── fmt.lisp ├── job.lisp ├── json-api.lisp ├── log.lisp ├── modules │ └── misp.lisp ├── package.lisp ├── pastelyzer.lisp ├── processing.lisp ├── rest.lisp ├── server.lisp ├── sys.lisp ├── util.lisp └── vars.lisp ├── support └── pastelyzer.service └── tests ├── circl-paste.lisp ├── config ├── filter.lisp └── sets.lisp ├── package.lisp ├── processing.lisp ├── suites.lisp └── util.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | bin/pastelyzer* 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/Postmodern"] 2 | path = deps/postmodern 3 | url = https://github.com/jdz/Postmodern.git 4 | [submodule "deps/cl-ppcre"] 5 | path = deps/cl-ppcre 6 | url = https://github.com/jdz/cl-ppcre.git 7 | [submodule "deps/cl-log"] 8 | path = deps/cl-log 9 | url = https://github.com/jdz/cl-log.git 10 | [submodule "deps/cl-speedy-queue"] 11 | path = deps/cl-speedy-queue 12 | url = https://github.com/jdz/cl-speedy-queue.git 13 | [submodule "deps/ip"] 14 | path = deps/ip 15 | url = https://github.com/jdz/ip.git 16 | [submodule "deps/2am"] 17 | path = deps/2am 18 | url = https://gitlab.common-lisp.net/dkochmanski/2am.git 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018-2020 CERT.LV 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 2. Altered source versions must be plainly marked as such, and must not be 16 | misrepresented as being the original software. 17 | 3. This notice may not be removed or altered from any source distribution. 18 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | #+TITLE: Pastelyzer 2 | 3 | * Introduction 4 | 5 | This is ~pastelyzer~: the paste analyzer. The user manual can be found in 6 | [[file:doc]] directory. 7 | 8 | #+ATTR_HTML: :width 95% 9 | #+ATTR_LATEX: :width 0.95\textwidth :float nil 10 | [[file:doc/img/cli-33E400e1.png]] 11 | 12 | The easiest way to run ~pastelyzer~ is by using the provided binary. 13 | 14 | * Runtime dependencies (standalone mode only) 15 | ** Libraries 16 | 17 | The following libraries must be installed on the target system: 18 | 19 | - ~libssl~ 20 | - ~libzmq~ (version 3+) 21 | 22 | *** Ubuntu Bionic (18.04), Eoan (19.10) packages 23 | 24 | - ~libssl1.1~ 25 | - ~libzmq5~ 26 | 27 | ** Database 28 | 29 | PostgreSQL database (at least version *9.5*) is required. Necessary tables 30 | will be created automatically. Database access parameters can be controlled 31 | using environment variables ~DB_HOST~, ~DB_PORT~, ~DB_NAME~, ~DB_USER~ and 32 | ~DB_PASS~. See user guide, section "Environment variables." 33 | 34 | * Compilation 35 | ** Set up a working Common Lisp environment 36 | 37 | ~pastelyzer~ works with SBCL and ClozureCL: 38 | 39 | - http://sbcl.org/ 40 | - https://ccl.clozure.com/ 41 | 42 | We strongly suggest using SBCL (which is also the implementation used in the 43 | examples below). Most Linux distributions already have an ~sbcl~ package 44 | available. 45 | 46 | ** Install system packages 47 | 48 | To follow the instructions below you'll need the following software: 49 | 50 | - C compiler (~gcc~). 51 | - ~curl~ (to download Quicklisp). 52 | - ~git~ (to clone ~pastelyzer~). 53 | - Header files for ~libssl~ and ~libzmq~. 54 | 55 | If you're using Ubuntu, you can install all of the above using the following 56 | command: 57 | 58 | #+BEGIN_EXAMPLE 59 | # apt install build-essential curl git libssl-dev libzmq3-dev sbcl 60 | #+END_EXAMPLE 61 | 62 | ** Install Quicklisp 63 | 64 | An abridged version of the official 65 | [[https://www.quicklisp.org/beta/#installation][Quicklisp install 66 | instructions]]: 67 | 68 | #+BEGIN_EXAMPLE 69 | $ curl -O https://beta.quicklisp.org/quicklisp.lisp 70 | $ sha256sum quicklisp.lisp 71 | 4a7a5c2aebe0716417047854267397e24a44d0cce096127411e9ce9ccfeb2c17 quicklisp.lisp 72 | $ sbcl --load quicklisp.lisp \ 73 | --eval '(quicklisp-quickstart:install)' \ 74 | --quit 75 | #+END_EXAMPLE 76 | 77 | ** Clone the source repository 78 | 79 | #+BEGIN_EXAMPLE 80 | $ git clone --recurse-submodules https://github.com/cert-lv/pastelyzer.git 81 | $ cd pastelyzer 82 | #+END_EXAMPLE 83 | 84 | ** Install dependencies 85 | 86 | This has to be done only once (whenever the lisp library dependencies 87 | change). In the ~pastelyzer~ directory run the ~bin/build.sh~ script with 88 | ~load-deps~ command: 89 | 90 | #+BEGIN_EXAMPLE 91 | $ bin/build.sh load-deps 92 | #+END_EXAMPLE 93 | 94 | ** Run the test suite (optional) 95 | 96 | #+BEGIN_EXAMPLE 97 | $ bin/build.sh test 98 | #+END_EXAMPLE 99 | 100 | ** Build an executable 101 | 102 | Run ~bin/build.sh~ script with no parameters in ~pastelyzer~ directory: 103 | 104 | #+BEGIN_EXAMPLE 105 | $ bin/build.sh 106 | #+END_EXAMPLE 107 | 108 | * Copyright and License 109 | 110 | ~pastelyzer~ is released under the terms of zlib/libpng license. See 111 | [[file:LICENSE]] file. 112 | 113 | * Acknowledgements 114 | 115 | This work was partially funded by CEF (Connecting Europe Facility) funding 116 | under "Improving Cyber Security Capacities in Latvia" 117 | (INEA/CEF/ICT/A2017/1528784). 118 | 119 | #+ATTR_HTML: :height 100px :align center 120 | [[file:doc/img/en_square_cef_logo.png]] 121 | -------------------------------------------------------------------------------- /bin/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # This script may look ugly, but that's because it is POSIX Shell 4 | # Command Language compliant: 5 | # 6 | # http://pubs.opengroup.org/onlinepubs/9699919799/ 7 | # 8 | # The main issue is the lack of arrays, so the only way to build a 9 | # command-line without issues with whitespace is to use the `set' 10 | # built-in. 11 | 12 | set -e 13 | 14 | SCRIPT="$0" 15 | SCRIPT_NAME=$(basename "$0") 16 | BINDIR=$(dirname "$0") 17 | ROOT=$(readlink -en "${BINDIR}/../") 18 | BUILD_ID="nil" 19 | 20 | usage() { 21 | echo "Usage: $SCRIPT_NAME {operation|implementation}*" 22 | echo 23 | echo " operation: build|test (default: build)" 24 | echo " implementation: sbcl|ccl (default: sbcl)" 25 | } 26 | 27 | calculate_build_id() { 28 | local rel_tag 29 | local n_master 30 | local n_branch 31 | local hash 32 | local release 33 | local build_id 34 | local git="git -C ${ROOT}" 35 | 36 | rel_tag=$($git describe --tags --match="rel-*" --abbrev=0 master 2> /dev/null)\ 37 | || { 38 | echo "No release tag found, cannot calculate build id." >&2 39 | return 40 | } 41 | 42 | n_master=$($git rev-list master --not "${rel_tag}" --count) 43 | n_branch=$($git rev-list HEAD --not master --count) 44 | release="${rel_tag#rel-}" 45 | build_id="${release}" 46 | 47 | if [ ! "$n_master" -eq 0 ]; then 48 | build_id="${release}.${n_master}" 49 | fi 50 | 51 | hash=$($git rev-parse --short HEAD) 52 | 53 | if [ $n_branch -eq 0 ]; then 54 | build_id="${build_id}-${hash}" 55 | else 56 | branch=$($git rev-parse --abbrev-ref HEAD) 57 | build_id="${build_id}.${branch}.${n_branch}-${hash}" 58 | fi 59 | 60 | BUILD_ID="\"${build_id}\"" 61 | } 62 | 63 | OPERATION="build" 64 | LISP="sbcl" 65 | 66 | while [ 0 -lt $# ] ; do 67 | case "$1" in 68 | sbcl) 69 | LISP=sbcl 70 | ;; 71 | ccl) 72 | LISP=ccl 73 | ;; 74 | test) 75 | OPERATION=test 76 | ;; 77 | load-deps) 78 | OPERATION=load-deps 79 | ;; 80 | --help|-h) 81 | usage 82 | exit 0 83 | ;; 84 | *) 85 | echo "$SCRIPT: Unrecognized option '$1'." 86 | echo "Try '$SCRIPT --help' for more information." 87 | exit 1 88 | ;; 89 | esac 90 | shift 91 | done 92 | 93 | # Implementation-specific options. 94 | case $LISP in 95 | sbcl) 96 | set -- \ 97 | --dynamic-space-size 1024 \ 98 | --no-sysinit \ 99 | --no-userinit \ 100 | --eval "(sb-ext:disable-debugger)" \ 101 | --eval "(sb-ext:restrict-compiler-policy 'safety 1)" \ 102 | --eval "(sb-ext:restrict-compiler-policy 'debug 1)" 103 | ;; 104 | ccl) 105 | set -- --batch --no-init 106 | ;; 107 | esac 108 | 109 | # Common options. 110 | set -- "$@" \ 111 | --eval "(push :hunchentoot-no-ssl *features*)" \ 112 | --eval "(require :asdf)" 113 | 114 | # Build options. 115 | case "$OPERATION" in 116 | test) 117 | case $LISP in 118 | sbcl) 119 | set -- "$@" \ 120 | --eval "(proclaim '(optimize (debug 3) (safety 3)))" \ 121 | --eval "(asdf:test-system \"pastelyzer\")" \ 122 | --eval "(sb-ext:exit :code (if 2am::*fail-count* 1 0))" 123 | ;; 124 | ccl) 125 | set -- "$@" \ 126 | --eval "(proclaim '(optimize (debug 3) (safety 3)))" \ 127 | --eval "(asdf:test-system \"pastelyzer\")" \ 128 | --eval "(ccl:quit (if 2am::*fail-count* 1 0))" 129 | ;; 130 | esac 131 | ;; 132 | load-deps) 133 | set -- "$@" \ 134 | --load ~/quicklisp/setup.lisp \ 135 | --eval "(ql:quickload :swank)" \ 136 | --eval "(push '#:swank-indentation swank-loader::*contribs*)" \ 137 | --eval "(swank-loader:init :load-contribs t :setup nil :reload t)" \ 138 | --eval "(ql:quickload \"pastelyzer\")" \ 139 | --quit 140 | ;; 141 | *) 142 | calculate_build_id 143 | set -- "$@" \ 144 | --eval "(asdf:load-system :swank)" \ 145 | --eval "(push '#:swank-indentation swank-loader::*contribs*)" \ 146 | --eval "(swank-loader:init :load-contribs t :setup nil :reload t)" \ 147 | --eval "(asdf:load-system \"pastelyzer\")" \ 148 | --eval "(setq pastelyzer::*build-id* ${BUILD_ID})" \ 149 | --eval "(proclaim '(optimize (speed 2) (safety 1)))" \ 150 | --eval "(asdf:load-system \"pastelyzer\")" \ 151 | --eval "(cffi:close-foreign-library 'pzmq::libzmq)" \ 152 | --eval "(cffi:close-foreign-library 'cl+ssl::libssl)" \ 153 | --eval "(cffi:close-foreign-library 'cl+ssl::libcrypto)" \ 154 | --eval "(asdf:make \"pastelyzer\")" 155 | if [ -f "${BINDIR}/pastelyzer" ]; then 156 | mv -f "${BINDIR}/pastelyzer" "${BINDIR}/pastelyzer.old" 157 | fi 158 | ;; 159 | esac 160 | 161 | export CL_SOURCE_REGISTRY="${ROOT}/:${ROOT}/deps//:~/quicklisp/dists/quicklisp/software//" 162 | 163 | "$LISP" "$@" 164 | -------------------------------------------------------------------------------- /doc/img/cli-33E400e1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cert-lv/pastelyzer/278caeaafd5073300b5ee5bc9b801be5bd4f8637/doc/img/cli-33E400e1.png -------------------------------------------------------------------------------- /doc/img/en_square_cef_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cert-lv/pastelyzer/278caeaafd5073300b5ee5bc9b801be5bd4f8637/doc/img/en_square_cef_logo.png -------------------------------------------------------------------------------- /doc/usage.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cert-lv/pastelyzer/278caeaafd5073300b5ee5bc9b801be5bd4f8637/doc/usage.pdf -------------------------------------------------------------------------------- /pastelyzer.asd: -------------------------------------------------------------------------------- 1 | (defsystem "pastelyzer" 2 | :description "Leak analyser" 3 | :author "Jānis Džeriņš " 4 | :version (:read-file-form "src/vars.lisp" :at (1 2)) 5 | :license "Zlib" 6 | :long-description "Monitor data posted on the interwebs -- mainly 7 | (but not only) on paste sites." 8 | :build-operation program-op 9 | :build-pathname "bin/pastelyzer" 10 | :entry-point "pastelyzer:run-standalone" 11 | :depends-on ("alexandria" 12 | "cl-base64" 13 | "cl-log" 14 | "cl-postgres" 15 | "cl-postgres+local-time" 16 | "cl-ppcre" 17 | "cl-smtp" 18 | "cl-speedy-queue" 19 | "cl-who" 20 | "drakma" 21 | "flexi-streams" 22 | "hunchentoot" 23 | "hunchensocket" 24 | "ip" 25 | "jsown" 26 | "postmodern" 27 | "pzmq" 28 | "split-sequence" 29 | "string-case" 30 | (:feature (:not :swank) "swank") 31 | (:feature (:not (:or :sbcl :ccl)) "trivial-utf-8") 32 | (:feature :sbcl (:require "sb-concurrency")) 33 | (:feature :sbcl "chipz") 34 | (:feature (:not :sbcl) "chanl")) 35 | :components ((:module "src" 36 | :serial t 37 | :components ((:file "package") 38 | (:file "vars") 39 | (:file "sys") 40 | (:file "compat") 41 | (:file "util") 42 | (:file "fmt") 43 | (:file "log") 44 | (:file "deflate" 45 | :if-feature (:and (:not :sbcl) (:not :quicklisp))) 46 | (:file "db") 47 | (:file "circl-zmq") 48 | (:file "circl-paste") 49 | (:file "job") 50 | (:file "processing") 51 | (:module "config" 52 | :serial t 53 | :components ((:file "package") 54 | (:file "sink") 55 | (:file "filter") 56 | (:file "context") 57 | (:file "loader") 58 | (:file "util") 59 | (:file "sets") 60 | (:file "smtp") 61 | (:file "cmd" :if-feature :sbcl))) 62 | (:file "server") 63 | (:file "json-api") 64 | (:file "rest") 65 | (:module "modules" 66 | :components ((:file "misp"))) 67 | (:file "cli") 68 | (:file "pastelyzer")))) 69 | :in-order-to ((test-op (test-op "pastelyzer/tests")))) 70 | 71 | (defsystem "pastelyzer/tests" 72 | :depends-on ("alexandria" 73 | "2am" 74 | "pastelyzer") 75 | :pathname "tests/" 76 | :components ((:file "package") 77 | (:file "processing" :depends-on ("package")) 78 | (:file "util" :depends-on ("package")) 79 | (:file "circl-paste" :depends-on ("package")) 80 | (:module "config" 81 | :depends-on ("package") 82 | :components ((:file "filter") 83 | (:file "sets"))) 84 | (:file "suites" :depends-on ("package" "config"))) 85 | :perform (test-op (o c) 86 | (symbol-call '#:2am '#:run 't))) 87 | -------------------------------------------------------------------------------- /public/dash.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Pastelyzer 4 | 5 | 6 | 7 | 8 |
9 | 10 | -------------------------------------------------------------------------------- /public/pastelyzer.css: -------------------------------------------------------------------------------- 1 | li.artefact-group { list-style-type: none; } 2 | li.artefact { list-style-type: none; } 3 | 4 | /* Colors borrowed (mostly) from Zenburn. */ 5 | html { 6 | scrollbar-width: thin; 7 | scrollbar-color: #336633 #3F3F3F; 8 | } 9 | 10 | body { 11 | color: #DCDCCC; 12 | background-color: #3F3F3F; 13 | font-family: sans-serif; 14 | } 15 | span.description { 16 | display: block; 17 | color: #CCEECC; 18 | background-color: #336633; 19 | padding: 0.3em; 20 | border-radius: 0.3em; 21 | } 22 | span.description a { 23 | color: #F0DFAF; 24 | text-decoration: none; 25 | } 26 | span.description a:visited { 27 | color: #909090; 28 | } 29 | .artefact { 30 | font-family: monospace; 31 | } 32 | .artefact .source-string { 33 | color: #DFAF8F; 34 | font-weight: bold; 35 | } 36 | .artefact.important .source-string{ 37 | color: #FF5050; 38 | font-weight: bold; 39 | } 40 | .hit { 41 | transform-origin: top; 42 | } 43 | .fragment-artefacts { 44 | /* By default UL element has margin set, but margins are not included 45 | * when calculating element dimensions (there's no box-sizing 46 | * property value that would help), so we remove the margins and use 47 | * padding instead. 48 | */ 49 | margin-top: 0px; 50 | padding-top: 1ex; 51 | margin-bottom: 0px; 52 | padding-bottom: 1ex; 53 | padding-left: 0.3em; /* Same as span.description. */ 54 | } 55 | -------------------------------------------------------------------------------- /public/pastelyzer.js: -------------------------------------------------------------------------------- 1 | function init_keys() { 2 | function add_stylesheet(content) { 3 | var style = document.createElement('style'); 4 | style.innerHTML = content; 5 | document.head.appendChild(style); 6 | return style.sheet; 7 | } 8 | 9 | var context_visibility_style = null; 10 | function toggle_context_visibility() { 11 | if (!context_visibility_style) { 12 | context_visibility_style = 13 | add_stylesheet(".artefact .context { display: none; }"); 14 | context_visibility_style.disabled = false; 15 | } 16 | else { 17 | context_visibility_style.disabled = !context_visibility_style.disabled; 18 | } 19 | } 20 | 21 | var context_selection_style = null; 22 | function toggle_context_selection() { 23 | if (!context_selection_style) { 24 | context_selection_style = 25 | add_stylesheet(".artefact .context { -webkit-user-select: none; user-select: none }"); 26 | context_selection_style.disabled = false; 27 | } 28 | else { 29 | context_selection_style.disabled = !context_selection_style.disabled; 30 | } 31 | } 32 | 33 | window.onkeydown = function (e) { 34 | if (!e.repeat && !e.ctrlKey && !e.altKey && !e.shiftKey && !e.metaKey) { 35 | switch (e.key) { 36 | case "c": 37 | toggle_context_visibility(); 38 | break; 39 | case "s": 40 | toggle_context_selection(); 41 | break; 42 | } 43 | } 44 | }; 45 | } 46 | 47 | function init_websocket() { 48 | let websocket = null; 49 | let keepalive_timer = null; 50 | 51 | function do_send(message) { 52 | if (websocket && websocket.readyState === WebSocket.OPEN) { 53 | websocket.send(message); 54 | } 55 | else { 56 | console.log("Not sending a message: socket is not OPEN"); 57 | } 58 | } 59 | 60 | function ping() { 61 | do_send(JSON.stringify({type: "ping"})); 62 | } 63 | 64 | function addHit(message, animate=true) { 65 | let container = document.getElementById("output"); 66 | if (50 <= container.childElementCount) { 67 | container.lastChild.remove(); 68 | } 69 | 70 | let stub = document.createElement("div"); 71 | stub.innerHTML = message; 72 | let hit = stub.firstChild; 73 | container.insertBefore(hit, container.firstChild); 74 | if (animate && hit.animate) { 75 | hit.animate([{ height: "0px", 76 | transform: "scaleY(0)" }, 77 | { height: hit.clientHeight + "px", 78 | transform: "scaleY(1)" }], 79 | { duration: 500, 80 | easing: "ease-out" }); 81 | } 82 | } 83 | 84 | function uniquify_url(str, param="x") { 85 | let index = str.indexOf("?"); 86 | let base = 0 < index ? str.slice(0, index) : str; 87 | let d = new Date().toJSON().replace(/[^\d]/g, ""); 88 | return base + "?" + param + "=" + d; 89 | } 90 | 91 | function connect() { 92 | let url = "ws://" + window.location.host + "/ws"; 93 | let conn = new WebSocket(url); 94 | 95 | conn.onerror = function(evt) { 96 | console.log("Websocket error: ", evt); 97 | }; 98 | 99 | conn.onopen = function(evt) { 100 | if (websocket) { 101 | console.log("Unexpectedly connected additional socket", evt.target); 102 | } 103 | else { 104 | console.log("Connected", evt.target); 105 | websocket = evt.target; 106 | if (keepalive_timer) { 107 | console.log("keepalive_timer active in conn.onopen!"); 108 | window.clearInterval(keepalive_timer); 109 | } 110 | keepalive_timer = window.setInterval(ping, 150 * 1000); 111 | } 112 | }; 113 | 114 | conn.onclose = function(evt) { 115 | if (!websocket || evt.target === websocket) { 116 | console.log("Disconnected", evt.target); 117 | websocket = null; 118 | window.clearInterval(keepalive_timer); 119 | keepalive_timer = null; 120 | 121 | window.setTimeout(connect, 10 * 1000); 122 | } 123 | else { 124 | console.log("Disconnected dangling socket", evt.target); 125 | } 126 | }; 127 | 128 | conn.onmessage = function(evt) { 129 | let msg = JSON.parse(evt.data); 130 | if ("type" in msg) { 131 | switch (msg.type) { 132 | case "add_hit": 133 | addHit(msg.data); 134 | break; 135 | case "bulk_add_hit": 136 | addHit(msg.data, false); 137 | break; 138 | case "reload": 139 | // Reload the Page itself. 140 | window.location.reload(); 141 | break; 142 | case "reload-css": 143 | // Reload CSS. 144 | let links = document.getElementsByTagName("link"); 145 | for (let i = 0; i < links.length; i++) { 146 | let link = links[i]; 147 | if (link.getAttribute("type") == "text/css") { 148 | link.href = uniquify_url(link.href); 149 | } 150 | } 151 | break; 152 | case "reload-js": 153 | // Reload scripts. 154 | let scripts = document.getElementsByTagName("script"); 155 | for (let i = 0; i < scripts.length; i++) { 156 | let script = scripts[i]; 157 | script.src = uniquify_url(script.src); 158 | } 159 | break; 160 | case "pong": 161 | // Do nothing for now. 162 | break; 163 | default: 164 | console.log("Don't know how to handle", msg); 165 | break; 166 | } 167 | } 168 | else { 169 | console.log("Received malformed message", msg); 170 | } 171 | }; 172 | 173 | return conn; 174 | } 175 | 176 | connect(); 177 | } 178 | 179 | function init() { 180 | init_websocket(); 181 | init_keys(); 182 | } 183 | 184 | window.addEventListener("load", init, false); 185 | -------------------------------------------------------------------------------- /src/circl-paste.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | (defgeneric remote-content-location (instance)) 4 | 5 | (defclass remote-content () 6 | ()) 7 | 8 | (defclass http-content (remote-content) 9 | ((uri 10 | :initarg :uri 11 | :reader remote-content-location 12 | :type (or puri:uri string)))) 13 | 14 | (defmethod print-object ((object http-content) (stream t)) 15 | (print-unreadable-object (object stream :type t :identity t) 16 | (puri:render-uri (remote-content-location object) stream))) 17 | 18 | (defclass link-only-http-content (http-content) 19 | ()) 20 | 21 | (defmethod content-body ((content link-only-http-content)) 22 | nil) 23 | 24 | (defclass content (binary-fragment) 25 | ((id 26 | :initarg :id 27 | :reader content-id 28 | :type unsigned-byte) 29 | (body 30 | :reader content-body))) 31 | 32 | (defmethod print-object ((object content) (stream t)) 33 | (print-unreadable-object (object stream :type t :identity t) 34 | (format stream "~A (~/fmt:nbytes/)" 35 | (content-id object) 36 | (length (content-body object))))) 37 | 38 | (defgeneric paste-source (paste) 39 | (:documentation "Returns 3 values: Source name, URL for the paste 40 | and URL for raw paste content (on the source).")) 41 | 42 | (defgeneric paste-origins (paste) 43 | (:documentation "Return a list of places where PASTE can be 44 | retrieved from.")) 45 | 46 | (defmethod paste-source ((paste t)) 47 | (values "Unknown" nil nil)) 48 | 49 | (defclass paste () 50 | ((id 51 | :initarg :id 52 | :reader paste-id 53 | :type (or null unsigned-byte)) 54 | (provider 55 | :initarg :provider 56 | :reader paste-provider 57 | :type string) 58 | (provider-id 59 | :initarg :provider-id 60 | :reader paste-provider-id 61 | :type string) 62 | (content 63 | :initarg :content 64 | :reader paste-content 65 | :type (or null content)))) 66 | 67 | (defmethod print-object ((paste paste) (stream t)) 68 | (if (or *print-escape* *print-readably*) 69 | (print-unreadable-object (paste stream :type t :identity t) 70 | (format stream "~A" (paste-id paste)) 71 | (when-let (content (paste-content paste)) 72 | (format stream " -> ~A (~/fmt:nbytes/)" 73 | (content-id content) 74 | (length (content-body content))))) 75 | (format stream "~A ~A~@[ -> ~A~]" 76 | (class-name (class-of paste)) 77 | (paste-id paste) 78 | (when-let (content (paste-content paste)) 79 | (content-id content))))) 80 | 81 | (defmethod paste-source ((paste paste)) 82 | (values 83 | (concatenate 'string (paste-provider paste) ":" (paste-provider-id paste)) 84 | nil 85 | nil)) 86 | 87 | ;;; Two convenience methods. 88 | (defmethod content-id ((paste paste)) 89 | (if-let (content (paste-content paste)) 90 | (content-id content) 91 | (error "~S has no content" paste))) 92 | 93 | (defmethod content-body ((paste paste)) 94 | (if-let (content (paste-content paste)) 95 | (content-body content) 96 | (error "~S has no content" paste))) 97 | 98 | (defmethod paste-content ((content content)) 99 | content) 100 | 101 | (defclass circl-paste (paste) 102 | () 103 | (:default-initargs 104 | :provider "circl")) 105 | 106 | (defun parse-circl-paste-id (string) 107 | (ppcre:register-groups-bind (site id) 108 | ("/([^/]+)/\\d{4}/\\d{2}/\\d{2}/(.+)\\.gz$" string) 109 | (return-from parse-circl-paste-id (values site id))) 110 | (error "Unrecognized paste file: ~S" string)) 111 | 112 | (defmethod paste-origin ((paste circl-paste)) 113 | (circl-provider-id-to-url (paste-provider-id paste))) 114 | 115 | (defun circl-provider-id-to-url (provider-id) 116 | (multiple-value-bind (site id) 117 | (parse-circl-paste-id provider-id) 118 | (flet ((url (scheme host &rest path) 119 | (make-instance 'puri:uri 120 | :scheme scheme 121 | :host host 122 | :path (apply #'concatenate 'string path)))) 123 | (cond ((or (string= "pastebin.com_pro" site) 124 | (string= "pastebin.com" site)) 125 | (values 126 | (make-instance 'http-content 127 | :uri (url :https "pastebin.com" "/raw/" id)) 128 | (make-instance 'link-only-http-content 129 | :uri (url :https "pastebin.com" "/" id)))) 130 | 131 | ((string= "lpaste.net" site) 132 | (values 133 | (make-instance 'http-content 134 | :uri (url :http site "/raw/" id)) 135 | (make-instance 'link-only-http-content 136 | :uri (url :http site "/" id)))) 137 | 138 | ((string= "gist.github.com" site) 139 | (destructuring-bind (user hash) 140 | (split-sequence #\_ id) 141 | (values 142 | (make-instance 'link-only-http-content 143 | :uri (url :https site "/" user "/" hash))))) 144 | 145 | ((string= "snipplr.com" site) 146 | (destructuring-bind (snipplr-id slug tail) 147 | (split-sequence #\_ id) 148 | (when (string/= "" tail) 149 | (warn "Unexpected snipplr.com paste id: ~S" id)) 150 | (values 151 | (make-instance 'link-only-http-content 152 | :uri (url :http site 153 | "/view/" snipplr-id "/" slug "/"))))) 154 | 155 | ((or (string= "paste.debian.net" site) 156 | (string= "ideone.com" site)) 157 | (values 158 | (make-instance 'http-content 159 | :uri (url :http site "/plain/" id)) 160 | (make-instance 'link-only-http-content 161 | :uri (url :http site "/" id)))) 162 | 163 | ((string= "slexy.org" site) 164 | (values 165 | (make-instance 'link-only-http-content 166 | :uri (url :https site "/view/" id)))) 167 | 168 | ((string= "paste.org.ru" site) 169 | (values 170 | (make-instance 'link-only-http-content 171 | :uri (url :http site "/?" id)))) 172 | 173 | ((string= "paste.opensuse.org" site) 174 | (values 175 | (make-instance 'http-content 176 | :uri (url :http site "/view/raw/" id)) 177 | (make-instance 'link-only-http-content 178 | :uri (url :http site "/" id)))) 179 | 180 | ((string= "kpaste.net" site) 181 | (values 182 | (make-instance 'http-content 183 | :uri (url :http site "/" id "?raw")) 184 | (make-instance 'link-only-http-content 185 | :uri (url :http site "/" id)))) 186 | 187 | ((string= "pastebin.ru" site) 188 | (values 189 | (make-instance 'http-content 190 | :uri (url :http site "/" id "/d/")) 191 | (make-instance 'link-only-http-content 192 | :uri (url :http site "/" id)))) 193 | 194 | ((or (string= "justpaste.it" site) 195 | (string= "paste.kde.org" site)) 196 | (values 197 | (make-instance 'link-only-http-content 198 | :uri (url :https site "/" id)))) 199 | 200 | (t 201 | (values 202 | (make-instance 'link-only-http-content 203 | :uri (url :http site "/" id)))))))) 204 | 205 | (defmethod paste-source ((paste circl-paste)) 206 | (multiple-value-bind (first other) 207 | (paste-origin paste) 208 | (let* ((first-url (remote-content-location first)) 209 | (host (puri:uri-host first-url))) 210 | (if other 211 | (values host (remote-content-location other) first-url) 212 | (values host first-url nil))))) 213 | 214 | (defclass web-server-state () 215 | ((host 216 | :initarg :host 217 | :reader web-server-state-host) 218 | (cookie-jar 219 | :initarg :cookie-jar 220 | :reader web-server-state-cookie-jar 221 | :initform (make-instance 'drakma:cookie-jar)))) 222 | 223 | (defclass rate-controlled-web-server-state (web-server-state) 224 | ((interval 225 | :initarg :interval 226 | :type fixnum 227 | :initform (* 8 internal-time-units-per-second) 228 | :documentation "Minimum time between requests using this state, in 229 | INTERNAL-TIME-UNITS-PER-SECOND.") 230 | (last-activity 231 | :type unsigned-byte 232 | :initform 0))) 233 | 234 | (defmethod print-object ((object web-server-state) (stream t)) 235 | (print-unreadable-object (object stream :type t :identity t) 236 | (princ (web-server-state-host object) stream))) 237 | 238 | (defmethod fetch-web-page :around ((state rate-controlled-web-server-state) 239 | (uri t) 240 | &key &allow-other-keys) 241 | (with-slots (interval last-activity) 242 | state 243 | (let ((time (get-internal-real-time)) 244 | (next-activity (+ last-activity interval))) 245 | (when (< time next-activity) 246 | ;; Pause for INTERVAL + 30%. 247 | (let ((seconds (/ (+ (- next-activity time) 248 | (random (* interval 0.3))) 249 | internal-time-units-per-second))) 250 | (msg :debug "Waiting ~,3Fs on ~A" seconds state) 251 | (sleep seconds))) 252 | (unwind-protect 253 | (call-next-method) 254 | (setf last-activity (get-internal-real-time)))))) 255 | 256 | (defmethod fetch-web-page ((state web-server-state) (uri puri:uri) 257 | &rest keys &key &allow-other-keys) 258 | (with-slots (cookie-jar) 259 | state 260 | (multiple-value-bind (body status headers real-uri stream closep reason) 261 | (apply #'drakma:http-request uri 262 | :want-stream t 263 | :redirect 1 264 | :user-agent *default-http-user-agent* 265 | :cookie-jar cookie-jar 266 | keys) 267 | (unwind-protect 268 | (cond ((= 200 status) 269 | (values (read-stream-content-into-byte-vector 270 | (flexi-streams:flexi-stream-stream body)) 271 | real-uri 272 | headers)) 273 | (t 274 | (error "Unexpected response: ~S (~A) from ~A" 275 | status reason uri))) 276 | (when closep 277 | (close stream)))))) 278 | 279 | ;;; XXX: This is not thread-safe! 280 | (defvar *server-states* (make-hash-table :test 'equal)) 281 | 282 | (defmethod initial-server-state-for ((content http-content)) 283 | (make-instance 'rate-controlled-web-server-state 284 | :host (puri:uri-host (remote-content-location content)))) 285 | 286 | (defmethod content-body ((content http-content)) 287 | (let ((key (puri:uri-host (remote-content-location content)))) 288 | (fetch-web-page (or (gethash key *server-states*) 289 | (setf (gethash key *server-states*) 290 | (initial-server-state-for content))) 291 | (remote-content-location content)))) 292 | 293 | (defclass fetched-content (http-content) 294 | ((origin 295 | :initarg :origin 296 | :reader fetched-content-origin 297 | :type (or null string) 298 | :documentation "Where the data was fetched from."))) 299 | 300 | (defmethod retrieve-original ((paste circl-paste)) 301 | (let ((origin (paste-origin paste))) 302 | (unless (find (puri:uri-host (remote-content-location origin)) 303 | *ignored-paste-sites* 304 | :test #'string=) 305 | (multiple-value-bind (data uri headers) 306 | (content-body origin) 307 | (when data 308 | (msg :debug "Retrieved ~A (~/fmt:nbytes/)" 309 | uri (length data)) 310 | (return-from retrieve-original 311 | (values data uri headers))))))) 312 | 313 | (defclass web-paste (paste) 314 | () 315 | (:default-initargs 316 | :provider "web")) 317 | 318 | (defmethod paste-source ((paste web-paste)) 319 | (let* ((provider-id (paste-provider-id paste)) 320 | (uri (puri:uri provider-id))) 321 | (values (puri:uri-host uri) provider-id uri))) 322 | 323 | (defun store-fixed-content (&key broken-id uri data) 324 | (declare (type unsigned-byte broken-id) 325 | (type puri:uri uri) 326 | (type (or null vector) data)) 327 | (let ((url (puri:render-uri uri nil)) 328 | (time (local-time:now))) 329 | (db:with-connection () 330 | (db:with-transaction () 331 | (multiple-value-bind (paste-id content-id) 332 | (db:store-paste data "web" url time) 333 | (db:insert-content-fix :broken-id broken-id 334 | :fixed-id content-id) 335 | (msg :notice "Fixed ~A -> ~A (~A)" broken-id content-id url) 336 | (make-instance 'web-paste 337 | :id paste-id 338 | :provider-id url 339 | :content (make-instance 'content 340 | :id content-id 341 | :body data))))))) 342 | 343 | (defun fix-paste (paste) 344 | (multiple-value-bind (data uri headers) 345 | (retrieve-original paste) 346 | (declare (ignore headers)) 347 | (let ((broken-id (content-id paste))) 348 | (cond (data 349 | (store-fixed-content :broken-id broken-id 350 | :data data 351 | :uri uri)) 352 | (t 353 | (msg :notice "Can't fix ~A" paste) 354 | (db:with-connection () 355 | (db:register-broken-content broken-id)) 356 | nil))))) 357 | 358 | (defun fetch-broken-pastes-loop (in out) 359 | ;; XXX: This is very much synchronous, i.e., broken pastes are 360 | ;; fetched (including waiting for server cooldown) and stored one by 361 | ;; one. 362 | (with-logged-warnings 363 | (loop for paste = (receive-message in) 364 | while paste 365 | do (handler-case 366 | (when-let (fixed (fix-paste paste)) 367 | (send-message out fixed)) 368 | (serious-condition (condition) 369 | (msg :error "Problem fixing ~A: ~A" paste condition) 370 | (db:with-connection () 371 | (db:register-broken-content (content-id paste)))))))) 372 | -------------------------------------------------------------------------------- /src/circl-zmq.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | (defun parse-circl-zmq-message (string) 4 | (multiple-value-bind (list position) 5 | (split-sequence #\space string :count 2) 6 | (destructuring-bind (code file) 7 | list 8 | (values (parse-integer code :junk-allowed nil) 9 | file 10 | (let ((len (length string))) 11 | (if (< position len) 12 | (base64:base64-string-to-usb8-array 13 | (make-array (- len position) 14 | :element-type (array-element-type string) 15 | :displaced-to string 16 | :displaced-index-offset position)) 17 | nil)))))) 18 | 19 | (defclass zmq-message () 20 | ((code 21 | :initarg :code 22 | :reader zmq-message-code 23 | :type (or fixnum string)) 24 | (file 25 | :initarg :file 26 | :reader zmq-message-file 27 | :type string) 28 | (data 29 | :initarg :data 30 | :reader zmq-message-data 31 | :type (or null (simple-array (unsigned-byte 8) (*)))) 32 | (time 33 | :initarg :time 34 | :reader zmq-message-time 35 | :initform (local-time:now) 36 | :type local-time:timestamp))) 37 | 38 | (defmethod print-object ((object zmq-message) (stream t)) 39 | (print-unreadable-object (object stream :type t :identity t) 40 | (format stream "~A '~A' (~D)" 41 | (zmq-message-code object) 42 | (zmq-message-file object) 43 | (length (zmq-message-data object))))) 44 | 45 | (defun process-circl-zmq-message (string handler) 46 | (handler-case 47 | (multiple-value-bind (code file data) 48 | (parse-circl-zmq-message string) 49 | ;; XXX: For now ignore the messages with code 101 -- those are 50 | ;; messages with no paste payload preceding messages with code 51 | ;; 102 which have the pastes (most of the time). 52 | (when (= 102 code) 53 | (msg :debug "<< ~D ~A ~/fmt:nbytes/" code file (length data)) 54 | (funcall handler 55 | (make-instance 'zmq-message 56 | :code code 57 | :file file 58 | :data data)))) 59 | (error (condition) 60 | (warn "~A while processing: ~S" condition string)))) 61 | 62 | (defun fetch-circl-pastes (address handler) 63 | (pzmq:with-socket socket :sub 64 | (handler-case 65 | (progn 66 | (pzmq:connect socket address) 67 | (loop 68 | (multiple-value-bind (string more) 69 | (pzmq:recv-string socket :encoding :ascii) 70 | (process-circl-zmq-message string handler) 71 | (when more 72 | (warn "Not handling multi-part message"))))) 73 | (error (condition) 74 | (warn "Problem receiving messages from ~A: ~A" 75 | address condition))))) 76 | 77 | (defgeneric store-in-db (paste) 78 | (:documentation "Store PASTE in database.")) 79 | 80 | (defmethod store-in-db ((paste zmq-message)) 81 | (let ((body (when-let ((data (zmq-message-data paste))) 82 | (handler-case 83 | (gunzip data) 84 | (end-of-file () 85 | (warn "Broken paste: ~A (~/fmt:nbytes/)" 86 | (zmq-message-file paste) 87 | (length (zmq-message-data paste))) 88 | (return-from store-in-db nil)) 89 | (error (condition) 90 | (warn "~A: ~A" paste condition) 91 | (return-from store-in-db nil)))))) 92 | (db:with-connection () 93 | (let ((provider "circl") 94 | (provider-id (zmq-message-file paste)) 95 | (time (zmq-message-time paste))) 96 | (multiple-value-bind (paste-id content-id) 97 | (if body 98 | (db:store-paste body provider provider-id time) 99 | (values (db:insert-paste provider provider-id :null time) 100 | nil)) 101 | (msg :info "~D -> ~A : circl : ~A (~/fmt:nbytes/ -> ~/fmt:nbytes/)" 102 | paste-id content-id 103 | (zmq-message-file paste) 104 | (length (zmq-message-data paste)) 105 | (length body)) 106 | (make-instance 'circl-paste 107 | :id paste-id 108 | :provider provider 109 | :provider-id provider-id 110 | :content (if content-id 111 | (make-instance 'content 112 | :id content-id 113 | :body body) 114 | nil))))))) 115 | -------------------------------------------------------------------------------- /src/cli.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | (eval-when (:compile-toplevel :load-toplevel :execute) 4 | (defconstant ESC (code-char 27)) 5 | 6 | (defun resolve-sgr-code (code) 7 | (or (typecase code 8 | (integer 9 | code) 10 | (keyword 11 | (case code 12 | (:clear 0) 13 | (:bright 1) 14 | (:dim 2) 15 | (:reverse 7) 16 | (:normal 22) 17 | (:black 30) 18 | (:red 31) 19 | (:green 32) 20 | (:yellow 33) 21 | (:blue 34) 22 | (:magenta 35) 23 | (:cyan 36) 24 | (:white 37)))) 25 | (error "Invalid SGR code: ~S" code))) 26 | 27 | (defun sgr (stream &rest codes) 28 | (format stream "~C[~{~A~^;~}m" ESC (mapcar #'resolve-sgr-code codes)))) 29 | 30 | (define-compiler-macro sgr (&whole form stream &rest codes &environment env) 31 | (cond ((every (lambda (code) (constantp code env)) 32 | codes) 33 | `(format ,stream ,(apply #'sgr nil codes))) 34 | (t 35 | form))) 36 | 37 | (defclass cli-job (job) 38 | ()) 39 | 40 | (defmethod analysable-parts ((path pathname) (job cli-job)) 41 | (list (make-instance 'binary-fragment 42 | :body (read-file-into-byte-vector path)))) 43 | 44 | (defmethod analysable-parts ((input (eql :stdin)) (job cli-job)) 45 | #-ccl 46 | (let ((body (read-stream-content-into-byte-vector *standard-input*))) 47 | (list (make-instance 'binary-fragment :body body))) 48 | #+ccl 49 | (let ((stdin (two-way-stream-input-stream *terminal-io*))) 50 | (setf (ccl::stream-external-format stdin) 51 | (ccl:make-external-format :line-termination :unix 52 | :character-encoding nil)) 53 | (let ((body (read-stream-content-into-string stdin))) 54 | (list (make-instance 'binary-fragment 55 | :body (map '(vector (unsigned-byte 8)) 56 | #'char-code 57 | body)))))) 58 | 59 | (defmethod noteworthy-artefact-p ((target t) (ctx cli-job)) 60 | nil) 61 | 62 | (defmethod noteworthy-artefact-p ((target string-artefact) (ctx cli-job)) 63 | t) 64 | 65 | (defmethod render-node 66 | ((view (eql :mono-term)) (node artefact) (job cli-job) (count integer) 67 | &optional (stream *standard-output*)) 68 | (let ((imp (noteworthy-artefact-p node job))) 69 | (multiple-value-bind (start end) 70 | (artefact-source-seq-bounds node) 71 | (format stream "~A..~A ~[~:;~:*(+~D) ~]~A: ~A" 72 | start end (1- count) 73 | (type-of node) 74 | (one-line (if imp 75 | (artefact-source node) 76 | (artefact-description node)) 77 | :limit 72 78 | :continuation "…" 79 | :mode :squeeze))))) 80 | 81 | (defmethod render-node 82 | ((view (eql :color-term)) (node artefact) (job cli-job) (count integer) 83 | &optional (stream *standard-output*)) 84 | (let ((imp (noteworthy-artefact-p node job))) 85 | (multiple-value-bind (start end) 86 | (artefact-source-seq-bounds node) 87 | (format stream "~A..~A ~[~:;~:*(+~D) ~]" start end (1- count)) 88 | (sgr stream (if imp :green :white)) 89 | (format stream "~A" (type-of node)) 90 | (sgr stream :dim :white) 91 | (write-string ": ") 92 | (cond (imp 93 | (write-string (artefact-context-before node :limit 16 :bol t) 94 | stream) 95 | (sgr stream :clear :bright :green) 96 | (write-string (one-line (artefact-source node) 97 | :limit 48 98 | :continuation "…" 99 | :mode :squeeze) 100 | stream) 101 | (sgr stream :clear :dim :white) 102 | (write-string (artefact-context-after node :limit 16 :eol t) 103 | stream)) 104 | (t 105 | (write-string (one-line (artefact-description node) 106 | :limit 48 107 | :continuation "…" 108 | :mode :squeeze) 109 | stream))) 110 | (sgr stream :clear)))) 111 | 112 | (defun render-tree (roots &key (stream *standard-output*) 113 | (children-fn (constantly '())) 114 | (print-fn #'princ) 115 | (dedup t)) 116 | (labels ((draw-node (node prefix lastp count) 117 | (format stream "~&~{~A~}~:[├~;└~]─ " prefix lastp) 118 | (funcall print-fn node count stream) 119 | (fresh-line stream) 120 | (draw-children (funcall children-fn node) 121 | (append prefix (list (if lastp " " "│ "))))) 122 | (draw-children (list prefix) 123 | (if dedup 124 | (let ((table (make-hash-table :test 'equal))) 125 | (loop for node in list 126 | for i upfrom 0 127 | for key = (artefact-key node) 128 | for value = (gethash key table) 129 | do (if value 130 | (incf (second value)) 131 | (setf (gethash key table) 132 | (list i 1 node)))) 133 | (loop for (list . more) 134 | on (sort (hash-table-values table) #'< :key #'first) 135 | do (draw-node 136 | (third list) prefix (not more) (second list)))) 137 | (loop for (child . more) on list 138 | do (draw-node child prefix (not more) 1))))) 139 | (let ((*print-length* 16) 140 | (*print-pretty* nil)) 141 | (dolist (root roots) 142 | (format stream "~A~%" root) 143 | (draw-children (funcall children-fn root) nil))))) 144 | 145 | (defvar *export-artefacts-counter* nil 146 | "The number of last exported artefact if exporting artefacts.") 147 | 148 | ;;; Not really an artefact, but an easy (the only) way to add 149 | ;;; something to the rendered artefact tree. 150 | (defclass exported-artefact () 151 | ((path 152 | :initarg :path 153 | :reader exported-artefact-path) 154 | (children 155 | :initarg :children 156 | :reader exported-artefact-children 157 | :type list))) 158 | 159 | (defmethod artefact-source-seq-start ((node exported-artefact)) 160 | -1) 161 | 162 | (defmethod extract-artefacts ((node exported-artefact) (job cli-job)) 163 | (exported-artefact-children node)) 164 | 165 | (defmethod artefact-key ((node exported-artefact)) 166 | node) 167 | 168 | (defmethod render-node 169 | ((view (eql :mono-term)) (node exported-artefact) (job cli-job) count 170 | &optional (stream *standard-output*)) 171 | (declare (ignore count)) 172 | (format stream "* Exported as ~A" (exported-artefact-path node))) 173 | 174 | (defmethod render-node 175 | ((view (eql :color-term)) (node exported-artefact) (job cli-job) count 176 | &optional (stream *standard-output*)) 177 | (declare (ignore count)) 178 | (write-string "* Exported as " stream) 179 | (sgr stream :dim :green) 180 | (princ (exported-artefact-path node) stream) 181 | (sgr stream :clear)) 182 | 183 | (defun export-artefact (subject artefact) 184 | (let* ((bytes (fragment-body (embedded-binary-bytes artefact))) 185 | (hash (ironclad:digest-sequence 'ironclad:sha1 bytes)) 186 | (filename (format nil "~A-~D-~/fmt:bytes/" 187 | (typecase subject 188 | (pathname 189 | (file-namestring subject)) 190 | (t 191 | subject)) 192 | (incf *export-artefacts-counter*) 193 | (subseq hash 0 4)))) 194 | (with-open-file (out filename 195 | :direction :output 196 | :element-type '(unsigned-byte 8) 197 | :if-exists :supersede) 198 | (write-sequence bytes out) 199 | (file-namestring out)))) 200 | 201 | (defmethod extract-artefacts :around ((node embedded-binary) (job cli-job)) 202 | (let ((result (call-next-method))) 203 | (if *export-artefacts-counter* 204 | (list (make-instance 'exported-artefact 205 | :path (export-artefact (job-subject job) node) 206 | :children result)) 207 | result))) 208 | 209 | (defun walk (job view dedup) 210 | (render-tree (list (job-subject job)) 211 | :children-fn (lambda (node) 212 | (sort (extract-artefacts node job) 213 | #'< :key #'artefact-source-seq-start)) 214 | :print-fn (lambda (node count stream) 215 | (render-node view node job count stream)) 216 | :dedup dedup)) 217 | 218 | (defun process-item (item view dedup) 219 | (handler-case 220 | (walk (make-instance 'cli-job :subject item) view dedup) 221 | (error (condition) 222 | (format *error-output* "~&~A~%" condition)))) 223 | 224 | (defun run-cli (&key paths 225 | (colour (sys:isatty *standard-output*)) 226 | export 227 | (dedup t) 228 | &allow-other-keys) 229 | (let ((*export-artefacts-counter* (if export 0 nil))) 230 | (loop for (item . more) on paths 231 | collect (process-item (if (string= "-" item) 232 | :stdin 233 | (parse-namestring item)) 234 | (if colour :color-term :mono-term) 235 | dedup) 236 | when more do (terpri)))) 237 | 238 | (defun update-status (stream) 239 | (write-string #.(format nil "~C[K~C" ESC #\return) stream) 240 | (finish-output stream)) 241 | 242 | (defun process-unprocessed (&key (batch-size 1000000)) 243 | "Re-process contents that have not been processed by current version." 244 | (check-type batch-size (integer 0 *)) 245 | (flet ((process-content (id size) 246 | (format *standard-output* "PROC ~D (~/fmt:nbytes/)" id size) 247 | (update-status *standard-output*) 248 | #+sbcl 249 | (when (< *huge-fragment-bytes* size) 250 | ;; Reduce the chance of running out of memory when 251 | ;; processing big documents. 252 | (sb-ext:gc :full t)) 253 | (handler-case 254 | (let ((content (fetch-content id))) 255 | (analyze content)) 256 | (error (condition) 257 | (msg :error "Failed to process content ~D: ~A" 258 | id condition))))) 259 | (db:with-connection () 260 | (loop 261 | (format *standard-output* "WAIT Fetching next batch") 262 | (update-status *standard-output*) 263 | (when (zerop (db:map-unprocessed-content-ids #'process-content 264 | :limit batch-size)) 265 | (return)))))) 266 | -------------------------------------------------------------------------------- /src/compat.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | #+sbcl 4 | (progn 5 | (defun bytes-to-string (bytes external-format 6 | &optional (errorp nil) 7 | (error-value nil)) 8 | (handler-bind ((sb-int:character-decoding-error 9 | #'(lambda (condition) 10 | (declare (ignore condition)) 11 | (unless errorp 12 | (return-from bytes-to-string error-value))))) 13 | (sb-ext:octets-to-string bytes :external-format external-format)))) 14 | 15 | #+ccl 16 | (progn 17 | (defun bytes-to-string (bytes external-format 18 | &optional (errorp nil) 19 | (error-value nil)) 20 | (handler-bind ((ccl:decoding-problem 21 | #'(lambda (condition) 22 | (if errorp 23 | (error condition) 24 | (return-from bytes-to-string error-value)))) 25 | (type-error 26 | ;; Temporary workaround for CCL's UTF-8 decoder 27 | ;; issues (the value NNNNNNN is not of the 28 | ;; expected type (MOD 1114112)). 29 | #'(lambda (condition) 30 | (if errorp 31 | (error condition) 32 | (return-from bytes-to-string error-value))))) 33 | (ccl:decode-string-from-octets bytes :external-format external-format)))) 34 | 35 | #-(or sbcl ccl) 36 | (defun bytes-to-string (bytes external-format 37 | &optional (errorp nil) 38 | (error-value nil)) 39 | (unless (eq :utf-8 external-format) 40 | (error "Unsupported string encoding: ~S" external-format)) 41 | 42 | (handler-bind ((trivial-utf-8:utf-8-decoding-error 43 | #'(lambda (condition) 44 | (declare (ignore condition)) 45 | (unless errorp 46 | (return-from bytes-to-string error-value))))) 47 | (trivial-utf-8:utf-8-bytes-to-string bytes))) 48 | 49 | (setf (documentation 'bytes-to-string 'function) 50 | "Decode BYTES into a string using encoding EXTERNAL-FORMAT. If 51 | there are decoding errors the behaviour depends on the value of 52 | ERRORP parameter: if it is NIL then the ERROR-VALUE is returned; 53 | otherwise the decoding error is not handled. 54 | 55 | Returns the decoded string.") 56 | 57 | (defmacro with-graceful-stop-flag ((var) &body body) 58 | `(let ((.should-stop. nil)) 59 | (declare (special .should-stop.)) 60 | (symbol-macrolet ((,var (locally 61 | (declare (special .should-stop.)) 62 | .should-stop.))) 63 | ,@body))) 64 | 65 | (defun gracefully-stop (thread) 66 | (flet ((thunk () 67 | (locally (declare (special .should-stop.)) 68 | (setf .should-stop. t)))) 69 | (bt:interrupt-thread thread #'thunk))) 70 | 71 | (defmacro async ((&rest bindings) &body body) 72 | `(#+sbcl sb-thread:make-thread 73 | #-sbcl bt:make-thread 74 | (lambda () 75 | ;; We don't use the :initial-bindings parameter of 76 | ;; bt:make-thread because it uses EVAL. 77 | (let ,bindings 78 | (handler-case (progn ,@body) 79 | (error (condition) 80 | (msg :error "Async error: ~A" condition))))))) 81 | 82 | #+sbcl 83 | (defun resolve-hostname (string) 84 | (handler-case 85 | (mapcar #'(lambda (quad) 86 | (declare (type (simple-array (unsigned-byte 8) (4)) quad)) 87 | (ip:ipv4-address-from-quad (aref quad 0) 88 | (aref quad 1) 89 | (aref quad 2) 90 | (aref quad 3))) 91 | (sb-bsd-sockets:host-ent-addresses 92 | (sb-bsd-sockets:get-host-by-name string))) 93 | ((or sb-bsd-sockets:host-not-found-error sb-bsd-sockets:name-service-error) 94 | () 95 | nil))) 96 | 97 | #+ccl 98 | (defun resolve-hostname (string) 99 | (handler-case 100 | (mapcar (compose #'ip:ipv4-address #'ccl:socket-address-host) 101 | (ccl:resolve-address :host string 102 | :address-family :internet 103 | :singlep nil)) 104 | (ccl:socket-creation-error () 105 | nil))) 106 | 107 | #+sbcl 108 | (defmacro with-tcp-connection ((var &key host port) &body body) 109 | (with-gensyms (socket addr) 110 | `(let ((,socket (make-instance 'sb-bsd-sockets:inet-socket 111 | :type :stream 112 | :protocol :tcp)) 113 | (,addr (sb-bsd-sockets:host-ent-address 114 | (sb-bsd-sockets:get-host-by-name ,host)))) 115 | (unwind-protect 116 | (progn 117 | (sb-bsd-sockets:socket-connect ,socket ,addr ,port) 118 | (let ((,var (sb-bsd-sockets:socket-make-stream ,socket 119 | :input t 120 | :output t 121 | :auto-close t))) 122 | ,@body)) 123 | (sb-bsd-sockets:socket-close ,socket))))) 124 | 125 | #+ccl 126 | (defmacro with-tcp-connection ((var &key host port) &body body) 127 | `(ccl:with-open-socket (,var :type :stream 128 | :remote-host ,host 129 | :remote-port ,port) 130 | ,@body)) 131 | 132 | #+sbcl 133 | (defun (setf thread-name) (name &optional (thread sb-thread:*current-thread*)) 134 | (setf (sb-thread:thread-name thread) name)) 135 | 136 | #+ccl 137 | (defun (setf thread-name) (name &optional (thread ccl:*current-process*)) 138 | (setf (ccl:process-name thread) name)) 139 | 140 | #-(or ccl sbcl) 141 | (defun (setf thread-name) (name &optional thread) 142 | (let ((first-time (load-time-value (cons t nil)))) 143 | (when (car first-time) 144 | (setf (car first-time) nil) 145 | (warn "(SETF THREAD-NAME) not implemented for this implementation")))) 146 | 147 | #+sbcl 148 | (defun make-named-thread (fn name &rest args) 149 | (declare (type (or symbol function) fn)) 150 | (sb-thread:make-thread fn :name name :arguments args)) 151 | 152 | #-sbcl 153 | (progn 154 | (defmacro make-named-thread (fn name &rest args) 155 | `(chanl:pexec (:name ,name) 156 | (funcall ,fn ,@args))) 157 | (defun make-mailbox (&key name) 158 | (declare (ignore name)) 159 | (make-instance 'chanl:unbounded-channel)) 160 | (defun mailbox-empty-p (mailbox) 161 | (chanl:recv-blocks-p mailbox)) 162 | (defun receive-message (mailbox) 163 | (chanl:recv mailbox)) 164 | (defun send-message (mailbox message) 165 | (chanl:send mailbox message))) 166 | 167 | #+sbcl 168 | (defun inflate (blob) 169 | (chipz:decompress nil 'chipz:deflate blob)) 170 | 171 | #-sbcl 172 | (defun inflate (blob) 173 | (flexi-streams:with-output-to-sequence (out) 174 | (flexi-streams:with-input-from-sequence (in blob) 175 | (funcall (find-symbol "INFLATE-ZLIB-STREAM" '#:ql-gunzipper) in out)))) 176 | 177 | #+sbcl 178 | (defun gunzip (blob) 179 | (chipz:decompress nil 'chipz:gzip blob)) 180 | 181 | #-sbcl 182 | (defun gunzip (blob) 183 | (flexi-streams:with-output-to-sequence (out) 184 | (flexi-streams:with-input-from-sequence (in blob) 185 | (funcall (find-symbol "INFLATE-GZIP-STREAM" '#:ql-gunzipper) in out)))) 186 | -------------------------------------------------------------------------------- /src/config/cmd.lisp: -------------------------------------------------------------------------------- 1 | ;;; This file is only loaded for SBCL (see system definition) because CCL does 2 | ;;; not (yet) support :directory parameter to RUN-PROGRAM. 3 | 4 | (defpackage #:pastelyzer.config.cmd 5 | (:use #:common-lisp) 6 | (:import-from #:alexandria 7 | #:read-stream-content-into-string 8 | #:read-stream-content-into-byte-vector) 9 | (:import-from #:pastelyzer.log 10 | #:msg) 11 | (:local-nicknames (#:sink #:pastelyzer.config.sink) 12 | (#:filter #:pastelyzer.config.filter) 13 | (#:usr #:pastelyzer.config.user) 14 | (#:util #:pastelyzer.config.util) 15 | (#:loader #:pastelyzer.config.loader)) 16 | (:import-from #:pastelyzer.config.package 17 | #:user-package 18 | #:user-identifier 19 | #:user-identifier-p)) 20 | 21 | (in-package #:pastelyzer.config.cmd) 22 | 23 | (defclass cmd (sink:prototype) 24 | ((default-environment 25 | :reader cmd-sink-default-environment 26 | :type list 27 | :documentation "External processes are executed in a clean environment 28 | with only the environment variables listed here present (populated in 29 | INITIALIZE-INSTANCE :after method)."))) 30 | 31 | (defmethod initialize-instance :after ((object cmd) &key) 32 | ;; The default enviroment should be re-generated when configuration 33 | ;; changes (when we support that). 34 | (let ((home (namestring (merge-pathnames ""))) 35 | (server (if pastelyzer::*acceptor* 36 | (princ-to-string pastelyzer::*web-server-external-uri*) 37 | nil))) 38 | (setf (slot-value object 'default-environment) 39 | `(("PASTELYZER_HOME" . ,home) 40 | ,@(when server `(("PASTELYZER_SERVER" . ,server))))))) 41 | 42 | (defmethod sink:get-prototype ((name (eql (user-identifier "CMD-SINK")))) 43 | (make-instance 'cmd)) 44 | 45 | (defclass finished-process () 46 | ((artefact 47 | :initarg :artefact 48 | :reader finished-process-artefact) 49 | (status 50 | :initarg :status 51 | :reader finished-process-status) 52 | (stdout 53 | :initarg :stdout 54 | :reader finished-process-stdout) 55 | (stderr 56 | :initarg :stderr 57 | :reader finished-process-stderr))) 58 | 59 | (defmethod filter:extract ((field (eql 'usr::note)) (proc finished-process)) 60 | (pastelyzer:artefact-note (finished-process-artefact proc))) 61 | 62 | (defmethod filter:extract ((field (eql 'usr::stdout)) (proc finished-process)) 63 | (finished-process-stdout proc)) 64 | 65 | (defmethod filter:extract ((field (eql 'usr::stderr)) (proc finished-process)) 66 | (finished-process-stderr proc)) 67 | 68 | (defmethod filter:extract ((field (eql 'usr::status)) (proc finished-process)) 69 | (finished-process-status proc)) 70 | 71 | (defmethod sink:parse-sink-attribute 72 | ((proto cmd) (attribute (eql :target)) &rest args) 73 | (list* attribute 74 | (sink:check-args proto attribute args 75 | '((:type (member :artefact :document)))))) 76 | 77 | (defmethod sink:parse-sink-attribute 78 | ((proto cmd) (attribute (eql :command)) &rest args) 79 | ;; Value of this attribute is a list of strings and expressions that 80 | ;; evaluate to strings. The first string is the command to execute, 81 | ;; and all the rest are command-line parameters for this command. 82 | (unless args 83 | (error 'sink:missing-attribute-value 84 | :sink proto 85 | :attribute attribute)) 86 | (list attribute 87 | (if (every #'stringp args) 88 | args 89 | (mapcar #'util:parse-dynamic-attribute args)))) 90 | 91 | (defmethod sink:parse-sink-attribute 92 | ((proto cmd) (attribute (eql :stdin)) &rest args) 93 | ;; What to pass into the standard input of the process. This should 94 | ;; be an expression that evaluates to a string or byte vector (i.e., 95 | ;; artefact string representation, or, in case of embedded binaries 96 | ;; an expression that extracts the bytes). If the attribute is not 97 | ;; specified then nothing is written to the standard input of the 98 | ;; process. 99 | ;; 100 | ;; Should we provide a stream that emits EOF immediately, or 101 | ;; /dev/null which just emits zeroes? 102 | (unless (and (car args) (endp (cdr args))) 103 | (error 'sink:too-many-attribute-values 104 | :attribute attribute 105 | :sink proto)) 106 | (let ((input (first args))) 107 | (list attribute (util:parse-dynamic-attribute input attribute)))) 108 | 109 | (defmethod sink:parse-sink-attribute 110 | ((proto cmd) (attribute (eql :stdout)) &rest args) 111 | (list* attribute 112 | (sink:check-args 113 | proto attribute args 114 | '((:type (member :collect-string :collect-bytes :discard)))))) 115 | 116 | (defmethod sink:parse-sink-attribute 117 | ((proto cmd) (attribute (eql :stderr)) &rest args) 118 | (list* attribute 119 | (sink:check-args 120 | proto attribute args 121 | '((:type (member :collect-string :collect-bytes :discard)))))) 122 | 123 | (defmethod sink:parse-sink-attribute 124 | ((proto cmd) (attribute (eql :environment)) &rest args) 125 | (list attribute 126 | (loop for (name value) in args 127 | collect (cons name 128 | (util:parse-dynamic-attribute value attribute))))) 129 | 130 | (defmethod sink:parse-sink-attribute 131 | ((proto cmd) (attribute (eql :action)) &rest args) 132 | ;; Currently the only supported action is to pass the finished 133 | ;; process instance to the specified filter. 134 | (unless (every (lambda (arg) 135 | (typep (filter:get-filter arg) 'process-filter)) 136 | args) 137 | (error 'sink:invalid-attribute-type 138 | :attribute attribute 139 | :sink proto 140 | :typespec 'process-filter 141 | :value args)) 142 | (list attribute args)) 143 | 144 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :target))) 145 | :artefact) 146 | 147 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :environment))) 148 | '()) 149 | 150 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :stdin))) 151 | :null) 152 | 153 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :stdout))) 154 | :discard) 155 | 156 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :stderr))) 157 | :discard) 158 | 159 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :action))) 160 | nil) 161 | 162 | (defmethod sink:attribute-value ((cfg cmd) (attribute (eql :time-limit))) 163 | nil) 164 | 165 | ;;; XXX: We might want the binding of this be an instance of 166 | ;;; EXTERNAL-PROCESS, which would have slots for the (temporary) 167 | ;;; directory where it is running, temporary files that have been 168 | ;;; created (in this directory), etc. 169 | (defvar *cmd-dir* nil 170 | "Directory (temporary) where external command is being executed.") 171 | 172 | (defmethod dump-to-tmpfile ((seq vector) (directory pathname) &optional prefix) 173 | (let ((element-type (etypecase seq 174 | ((vector (unsigned-byte 8)) '(unsigned-byte 8)) 175 | (base-string 'base-char) 176 | (string 'character)))) 177 | (sys:with-temporary-file (out :directory directory 178 | :element-type element-type 179 | :prefix prefix) 180 | (write-sequence seq out) 181 | (pathname out)))) 182 | 183 | (defmethod dump-to-tmpfile ((fragment pastelyzer:binary-fragment) 184 | (directory pathname) 185 | &optional prefix) 186 | (sys:with-temporary-file (out :directory directory 187 | :element-type '(unsigned-byte 8) 188 | :prefix prefix) 189 | (write-sequence (pastelyzer:fragment-body fragment) out) 190 | (pathname out))) 191 | 192 | (defmethod dump-to-tmpfile ((fragment pastelyzer:string-fragment) 193 | (directory pathname) 194 | &optional prefix) 195 | (sys:with-temporary-file (out :directory directory 196 | :element-type 'character 197 | :prefix prefix) 198 | (write-sequence (pastelyzer:fragment-body fragment) out) 199 | (pathname out))) 200 | 201 | (defmethod dump-to-tmpfile ((artefact pastelyzer:artefact) 202 | (directory pathname) 203 | &optional prefix) 204 | (dump-to-tmpfile (pastelyzer:artefact-source artefact) directory prefix)) 205 | 206 | (defmethod dump-to-tmpfile ((datum (eql :null)) (directory pathname) 207 | &optional prefix) 208 | (declare (ignore prefix)) 209 | nil) 210 | 211 | (defmethod filter:generate-filter-function 212 | ((operator (eql (user-identifier "STORE-TMPFILE"))) &rest body) 213 | (check-type body (or null (cons string null))) 214 | (filter:make-function store-tmpfile (value cont) 215 | (unless *cmd-dir* 216 | (error "STORE-TMPFILE called in invalid context.")) 217 | (funcall cont (namestring (dump-to-tmpfile value *cmd-dir* (first body)))))) 218 | 219 | (defmethod (setf pastelyzer:artefact-note) (note (process finished-process)) 220 | (setf (pastelyzer:artefact-note (finished-process-artefact process)) 221 | note)) 222 | 223 | (defmethod pastelyzer:important-artefact-p ((process finished-process)) 224 | (pastelyzer:important-artefact-p (finished-process-artefact process))) 225 | 226 | (defmethod (setf pastelyzer:important-artefact-p) 227 | (flag (process finished-process)) 228 | (setf (pastelyzer:important-artefact-p (finished-process-artefact process)) 229 | flag)) 230 | 231 | (defclass process-filter (filter:filter) 232 | ()) 233 | 234 | (defmethod loader:apply-directive 235 | ((directive (eql (user-identifier "DEFINE-PROCESS-FILTER"))) (args list)) 236 | (destructuring-bind (name code &rest actions) 237 | args 238 | (filter:add-filter 'process-filter name code actions))) 239 | 240 | (defmethod sink:finish-sink ((proto cmd) (sink sink:sink)) 241 | (let ((stdout (sink:attribute-value sink :stdout)) 242 | (stderr (sink:attribute-value sink :stderr)) 243 | (action (sink:attribute-value sink :action)) 244 | (time (sink:attribute-value sink :time-limit))) 245 | (labels 246 | ((open-stream (method dir prefix) 247 | (ecase method 248 | (:collect-string 249 | (sys:open-tmpfile dir prefix :element-type 'character)) 250 | (:collect-bytes 251 | (sys:open-tmpfile dir prefix :element-type '(unsigned-byte 8))) 252 | (:discard 253 | nil))) 254 | (slurp-stream (method stream) 255 | (ecase method 256 | (:collect-string 257 | (file-position stream 0) 258 | (prog1 (pastelyzer.util:trim-space 259 | (read-stream-content-into-string stream) 260 | :both) 261 | (close stream))) 262 | (:collect-bytes 263 | (file-position stream 0) 264 | (prog1 (read-stream-content-into-byte-vector stream) 265 | (close stream))) 266 | (:discard 267 | nil))) 268 | (process (item actions stdout stderr) 269 | (let ((env (append 270 | (sink:attribute-value sink :environment) 271 | (cmd-sink-default-environment proto))) 272 | (stdin (sink:attribute-value-in-context sink :stdin item))) 273 | (sys:with-temporary-directory (tmpdir) 274 | (let* ((*cmd-dir* tmpdir) 275 | (cmd (sink:attribute-value-in-context sink :command item)) 276 | (out (open-stream stdout tmpdir "stdout-")) 277 | (err (open-stream stderr tmpdir "stderr-"))) 278 | (multiple-value-bind (status) 279 | (sys:run-program cmd 280 | :stdin (dump-to-tmpfile stdin tmpdir) 281 | :stdout out 282 | :stderr err 283 | :environment env 284 | :directory tmpdir 285 | :timeout time) 286 | (let ((proc-out (slurp-stream stdout out)) 287 | (proc-err (slurp-stream stderr err))) 288 | (dolist (action actions 42) 289 | (filter:apply-filter (filter:get-filter action) 290 | (make-instance 'finished-process 291 | :artefact item 292 | :stdout proc-out 293 | :stderr proc-err 294 | :status status) 295 | sink))))))))) 296 | (ecase (sink:attribute-value sink :target) 297 | (:document 298 | (process (sink:sink-document sink) action stdout stderr)) 299 | (:artefact 300 | (mapc (lambda (item) (process item action stdout stderr)) 301 | (sink:sink-artefacts sink))))))) 302 | -------------------------------------------------------------------------------- /src/config/context.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.context 2 | (:use :common-lisp) 3 | (:import-from #:pastelyzer.log 4 | #:msg) 5 | (:import-from #:pastelyzer 6 | #:job 7 | #:job-subject 8 | #:register-artefact 9 | #:finish-job) 10 | (:import-from #:pastelyzer.config.sink 11 | #:configuration 12 | #:resolve-configuration 13 | #:get-prototype 14 | #:name-of 15 | #:sink 16 | #:collect-artefact 17 | #:add-artefact 18 | #:finish-sink) 19 | (:import-from #:pastelyzer.config.filter 20 | #:apply-filters 21 | #:discard-artefact) 22 | (:export #:configurable-job 23 | #:job-artefacts)) 24 | 25 | (in-package #:pastelyzer.config.context) 26 | 27 | (defclass configurable-job (job) 28 | ((sinks 29 | :initform '()) 30 | (artefacts 31 | :reader job-artefacts 32 | :type list 33 | :initform '()))) 34 | 35 | ;;; Maybe this function's name should include MATERIALIZE or ENSURE? 36 | (defmethod get-sink ((ctx configurable-job) (cfg symbol)) 37 | (get-sink ctx (resolve-configuration cfg))) 38 | 39 | (defmethod get-sink ((ctx configurable-job) (cfg configuration)) 40 | (let* ((name (name-of cfg)) 41 | (cons (assoc name (slot-value ctx 'sinks)))) 42 | (cond (cons 43 | (cdr cons)) 44 | (t 45 | (let ((sink (make-instance 'sink 46 | :configuration cfg 47 | :document (job-subject ctx)))) 48 | (setf (slot-value ctx 'sinks) 49 | (acons name sink (slot-value ctx 'sinks))) 50 | sink))))) 51 | 52 | (defmethod register-artefact ((job configurable-job) 53 | (artefact pastelyzer:string-artefact) 54 | (source t)) 55 | (let ((reason (catch 'discard-artefact 56 | (apply-filters artefact job) 57 | (push artefact (slot-value job 'artefacts)) 58 | (return-from register-artefact artefact)))) 59 | (signal 'pastelyzer:artefact-discarded 60 | :artefact artefact 61 | :reason reason))) 62 | 63 | (defmethod collect-artefact ((artefact t) 64 | (cfg symbol) 65 | (ctx configurable-job)) 66 | (collect-artefact artefact (resolve-configuration cfg) ctx)) 67 | 68 | (defmethod collect-artefact ((artefact t) 69 | (cfg configuration) 70 | (ctx configurable-job)) 71 | (let ((sink (get-sink ctx cfg))) 72 | (add-artefact sink artefact))) 73 | 74 | (defmethod finish-job ((job configurable-job)) 75 | (loop for (nil . sink) in (slot-value job 'sinks) 76 | do (finish-sink (get-prototype sink) sink))) 77 | 78 | (defmethod pastelyzer:process :after ((job configurable-job)) 79 | (finish-job job)) 80 | -------------------------------------------------------------------------------- /src/config/filter.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.filter 2 | (:use :common-lisp) 3 | (:import-from #:alexandria 4 | #:when-let) 5 | (:import-from #:pastelyzer 6 | #:starts-with-subseq 7 | #:ends-with-subseq) 8 | (:import-from #:pastelyzer.log 9 | #:msg) 10 | (:import-from #:pastelyzer.config.package 11 | #:user-package) 12 | (:local-nicknames (#:usr #:pastelyzer.config.user) 13 | (#:sink #:pastelyzer.config.sink) 14 | (#:util #:pastelyzer.util)) 15 | (:export #:filter 16 | #:add-filter 17 | #:get-filter 18 | #:apply-filter 19 | #:apply-filters 20 | #:collect-into 21 | #:extract 22 | #:generate-filter-function 23 | #:make-function)) 24 | 25 | (in-package #:pastelyzer.config.filter) 26 | 27 | (defvar *filters* '() 28 | "An association list of defined filters.") 29 | 30 | (defclass filter () 31 | ((name 32 | :initarg :name 33 | :reader filter-name 34 | :type symbol) 35 | (function 36 | :initarg :function 37 | :reader filter-function 38 | :type (or symbol function)) 39 | (actions 40 | :initarg :actions 41 | :reader filter-actions 42 | :type list))) 43 | 44 | (defun get-filter (name) 45 | (or (cdr (assoc name *filters*)) 46 | (error "Unknown filter: ~S." name))) 47 | 48 | (defun register-filter (class name function actions) 49 | (check-type name symbol) 50 | (let ((filter (make-instance class 51 | :name name 52 | :function function 53 | :actions actions))) 54 | (when (assoc name *filters*) 55 | (warn "Redefining filter ~A" name) 56 | (setq *filters* (remove name *filters* :key #'car))) 57 | (setq *filters* (append *filters* (list (cons name filter))))) 58 | name) 59 | 60 | (defvar *variables* '()) 61 | 62 | (defun set-variable (name value) 63 | (setf *variables* (acons name value *variables*))) 64 | 65 | (defun get-variable (name) 66 | (let ((cons (assoc name *variables*))) 67 | (if cons 68 | (cdr cons) 69 | (error "Variable ~S is not set." name)))) 70 | 71 | (defun apply-filter (filter value ctx) 72 | (handler-case 73 | (let ((*variables* '())) 74 | (when (funcall (filter-function filter) value) 75 | (dolist (action (filter-actions filter)) 76 | (funcall action value ctx)))) 77 | (serious-condition (condition) 78 | (msg :error "While applying filter ~S to ~S: ~A" 79 | (filter-name filter) 80 | value 81 | condition)))) 82 | 83 | (defun apply-filters (value ctx &key (class (find-class 'filter))) 84 | (loop for (nil . filter) in *filters* 85 | when (eq class (class-of filter)) 86 | do (apply-filter filter value ctx)) 87 | value) 88 | 89 | (defgeneric parse-action (action &rest args) 90 | (:documentation "Parse a filter action form.")) 91 | 92 | (defmethod parse-action ((action (eql 'usr:collect-into)) &rest args) 93 | (check-type args (cons symbol null)) 94 | (let ((cfg (sink:resolve-configuration (first args)))) 95 | (lambda (artefact ctx) 96 | (sink:collect-artefact artefact cfg ctx)))) 97 | 98 | (defmethod parse-action ((action (eql 'usr:discard)) &rest args) 99 | (check-type args (or null (cons string null))) 100 | (let ((reason (first args))) 101 | (lambda (artefact ctx) 102 | (declare (ignore artefact ctx)) 103 | (throw 'discard-artefact reason)))) 104 | 105 | (defmethod parse-action ((action (eql 'usr:set-important)) &rest args) 106 | (check-type args null) 107 | (lambda (artefact ctx) 108 | (declare (ignore ctx)) 109 | (msg :debug "Marking ~S as important") 110 | (setf (pastelyzer:important-artefact-p artefact) t))) 111 | 112 | (defmethod parse-action ((action (eql 'usr:set-note)) &rest args) 113 | (let ((datum (first args))) 114 | (etypecase datum 115 | (string 116 | (lambda (artefact ctx) 117 | (declare (ignore ctx)) 118 | (msg :debug "Setting note of ~S to ~S" artefact datum) 119 | (setf (pastelyzer:artefact-note artefact) datum))) 120 | (symbol 121 | (lambda (artefact ctx) 122 | (declare (ignore ctx)) 123 | (let ((note (get-variable datum))) 124 | (msg :debug "Setting note of ~S to value of ~A: ~S" 125 | artefact datum note) 126 | (setf (pastelyzer:artefact-note artefact) note))))))) 127 | 128 | (defmacro make-function (op (&rest args) &body body) 129 | (declare (ignorable op)) 130 | `(lambda (&rest .args.) 131 | ;; (format *trace-output* "~&(~S ~{~S~^ ~})~%" ',op .args.) 132 | (destructuring-bind ,args 133 | .args. 134 | ,@body))) 135 | 136 | (defgeneric generate-filter-function (operator &rest body)) 137 | 138 | (defmethod generate-filter-function ((operator t) &rest body) 139 | (declare (ignore body)) 140 | (error "Unknown operator: ~S" operator)) 141 | 142 | (defmethod generate-filter-function ((operator (eql 'usr:^)) &rest body) 143 | (check-type body (or null (cons symbol))) 144 | (let ((name (or (first body) 'usr:^))) 145 | (make-function ^ (value cont) 146 | (set-variable name value) 147 | (funcall cont value)))) 148 | 149 | (defmethod generate-filter-function ((operator (eql 'usr:and)) &rest body) 150 | (if (endp body) 151 | (make-function and (value cont) 152 | (declare (ignorable value)) 153 | (funcall cont value)) 154 | (let ((head (apply #'generate-filter-function (first body))) 155 | (tail (apply #'generate-filter-function operator (rest body)))) 156 | (declare (type function head tail)) 157 | (make-function and (value cont) 158 | (funcall head value 159 | ;; XXX Allocates a closure at runtime. 160 | (lambda (result) 161 | (if result 162 | (funcall tail value cont) 163 | (funcall cont nil)))))))) 164 | 165 | (defmethod generate-filter-function ((operator (eql 'usr:or)) &rest body) 166 | (if (endp body) 167 | (make-function or (value cont) 168 | (declare (ignorable value)) 169 | (funcall cont nil)) 170 | (let ((head (apply #'generate-filter-function (first body))) 171 | (tail (apply #'generate-filter-function operator (rest body)))) 172 | (declare (type function head tail)) 173 | (make-function or (value cont) 174 | (funcall head value 175 | ;; XXX Allocates a closure at runtime. 176 | (lambda (result) 177 | (if result 178 | (funcall cont result) 179 | (funcall tail value cont)))))))) 180 | 181 | (defmethod generate-filter-function ((operator (eql 'usr:not)) &rest body) 182 | (check-type body (cons list null)) 183 | (let ((inner (apply #'generate-filter-function (first body)))) 184 | (declare (type function inner)) 185 | (make-function not (value cont) 186 | (funcall inner value 187 | ;; XXX Allocates a closure at runtime. 188 | (lambda (result) 189 | (funcall cont (if result nil t))))))) 190 | 191 | (defmethod equals ((left number) (right number)) 192 | (= left right)) 193 | 194 | (defmethod lessp ((left number) (right number)) 195 | (< left right)) 196 | 197 | (defmethod greaterp ((left number) (right number)) 198 | (> left right)) 199 | 200 | (defmethod equals ((left string) (right string)) 201 | (string= left right)) 202 | 203 | (defmethod equals ((left string) (right pastelyzer:string-artefact)) 204 | (string= left (pastelyzer:artefact-source right))) 205 | 206 | (defmethod lessp ((left string) (right string)) 207 | (string< left right)) 208 | 209 | (defmethod lessp ((left string) (right pastelyzer:string-artefact)) 210 | (string< left (pastelyzer:artefact-source right))) 211 | 212 | (defmethod greaterp ((left string) (right string)) 213 | (string> left right)) 214 | 215 | (defmethod greaterp ((left string) (right pastelyzer:string-artefact)) 216 | (string> left (pastelyzer:artefact-source right))) 217 | 218 | (defmethod generate-filter-function ((operator (eql 'usr:=)) &rest body) 219 | (check-type body (cons (or number string) null)) 220 | (let ((datum (first body))) 221 | (make-function = (value cont) 222 | (funcall cont (equals datum value))))) 223 | 224 | (defmethod generate-filter-function ((operator (eql 'usr:>)) &rest body) 225 | (check-type body (cons number null)) 226 | (let ((number (first body))) 227 | (make-function > (value cont) 228 | (funcall cont (greaterp value number))))) 229 | 230 | (defmethod generate-filter-function ((operator (eql 'usr:<)) &rest body) 231 | (check-type body (cons number null)) 232 | (let ((number (first body))) 233 | (make-function < (value cont) 234 | (funcall cont (lessp value number))))) 235 | 236 | (defmethod generate-filter-function ((operator (eql 'usr:type?)) &rest body) 237 | (check-type body (cons symbol null)) 238 | (let ((type (first body))) 239 | (make-function type? (value cont) 240 | (funcall cont (typep value type))))) 241 | 242 | (defmethod generate-filter-function ((operator (eql 'usr:exact-type?)) 243 | &rest body) 244 | (check-type body (cons symbol null)) 245 | (let ((type (first body))) 246 | (make-function type? (value cont) 247 | (funcall cont (eq type (class-name (class-of value))))))) 248 | 249 | (defgeneric extract (field object) 250 | (:documentation "Used by EXTRACT filter expression.")) 251 | 252 | (defmethod extract ((field t) (object t)) 253 | (error "Unknown extractor ~S for ~S" field object)) 254 | 255 | (defmethod generate-filter-function ((operator (eql 'usr:extract)) &rest body) 256 | (check-type body (cons symbol null)) 257 | (let ((accessor (find-symbol (symbol-name (first body)) (user-package)))) 258 | (make-function extract (value cont) 259 | (funcall cont (extract accessor value))))) 260 | 261 | (defmethod generate-filter-function ((operator (eql 'usr:->)) &rest body) 262 | (if (endp body) 263 | (make-function -> (value cont) 264 | (funcall cont value)) 265 | (let ((head (apply #'generate-filter-function (first body))) 266 | (tail (apply #'generate-filter-function operator (rest body)))) 267 | (declare (type function head tail)) 268 | ;; XXX Allocates a closure at runtime. 269 | (make-function -> (value cont) 270 | (funcall head value 271 | (lambda (result) 272 | (funcall tail result cont))))))) 273 | 274 | (defmethod value-length ((value sequence)) 275 | (length value)) 276 | 277 | (defmethod value-length ((value pastelyzer:fragment)) 278 | (length (pastelyzer:fragment-body value))) 279 | 280 | (defmethod value-length ((value pastelyzer:artefact)) 281 | (multiple-value-bind (start end) 282 | (pastelyzer:artefact-source-seq-bounds value) 283 | (- end start))) 284 | 285 | (defmethod generate-filter-function ((operator (eql 'usr:length)) &rest body) 286 | (check-type body null) 287 | (make-function length (value cont) 288 | (funcall cont (value-length value)))) 289 | 290 | (defmethod starts-with? ((prefix sequence) (value sequence)) 291 | (starts-with-subseq prefix value)) 292 | 293 | (defmethod starts-with? ((prefix sequence) (value pastelyzer:fragment)) 294 | (starts-with? prefix (pastelyzer:fragment-body value))) 295 | 296 | (defmethod starts-with? ((prefix sequence) (value pastelyzer:artefact)) 297 | (multiple-value-bind (start end) 298 | (pastelyzer:artefact-source-seq-bounds value) 299 | (starts-with-subseq prefix (pastelyzer:artefact-source-seq value) 300 | :start2 start 301 | :end2 end))) 302 | 303 | (defmethod generate-filter-function ((operator (eql 'usr:starts-with?)) 304 | &rest body) 305 | (check-type body (cons vector null)) 306 | (let ((prefix (first body))) 307 | (make-function starts-with? (sequence cont) 308 | (funcall cont (starts-with? prefix sequence))))) 309 | 310 | (defmethod ends-with? ((suffix sequence) (value sequence)) 311 | (ends-with-subseq suffix value)) 312 | 313 | (defmethod ends-with? ((suffix sequence) (value pastelyzer:fragment)) 314 | (ends-with? suffix (pastelyzer:fragment-body value))) 315 | 316 | (defmethod ends-with? ((suffix sequence) (value pastelyzer:artefact)) 317 | (multiple-value-bind (start end) 318 | (pastelyzer:artefact-source-seq-bounds value) 319 | (ends-with-subseq suffix (pastelyzer:artefact-source-seq value) 320 | :start2 start 321 | :end2 end))) 322 | 323 | (defmethod generate-filter-function ((operator (eql 'usr:ends-with?)) 324 | &rest body) 325 | (check-type body (cons vector null)) 326 | (let ((suffix (first body))) 327 | (make-function ends-with? (sequence cont) 328 | (funcall cont (ends-with? suffix sequence))))) 329 | 330 | (defmethod contains? ((needle sequence) (value sequence)) 331 | (search needle value)) 332 | 333 | (defmethod contains? ((needle sequence) (value pastelyzer:fragment)) 334 | (contains? needle (pastelyzer:fragment-body value))) 335 | 336 | (defmethod contains? ((needle sequence) (value pastelyzer:artefact)) 337 | (multiple-value-bind (start end) 338 | (pastelyzer:artefact-source-seq-bounds value) 339 | (search needle (pastelyzer:artefact-source-seq value) 340 | :start2 start 341 | :end2 end))) 342 | 343 | (defmethod generate-filter-function ((operator (eql 'usr:contains?)) 344 | &rest body) 345 | (check-type body (cons vector null)) 346 | (let ((needle (first body))) 347 | (make-function contains? (sequence cont) 348 | (funcall cont (if (contains? needle sequence) t nil))))) 349 | 350 | (defmethod mixed-case-p ((string string)) 351 | (util:mixed-case-p string)) 352 | 353 | (defmethod mixed-case-p ((artefact pastelyzer:string-artefact)) 354 | (util:mixed-case-p (pastelyzer:artefact-source-seq artefact) 355 | (pastelyzer:artefact-source-seq-start artefact) 356 | (pastelyzer:artefact-source-seq-end artefact))) 357 | 358 | (defmethod generate-filter-function ((operator (eql 'usr:mixed-case?)) 359 | &rest body) 360 | (check-type body null) 361 | (make-function mixed-case? (thing cont) 362 | (funcall cont (if (mixed-case-p thing) t nil)))) 363 | 364 | (defun parse-filter (name form) 365 | (msg :debug "Filter ~S: ~S" name form) 366 | (if form 367 | (let ((test (apply #'generate-filter-function form))) 368 | (lambda (value) 369 | (msg :debug "Applying filter ~S to ~S" name value) 370 | (funcall test value 371 | (lambda (result) 372 | (msg :debug "~S ~S => ~:[fail~;success~]" 373 | name value result) 374 | result)))) 375 | #'identity)) 376 | 377 | (defun add-filter (class name code actions) 378 | (register-filter class name 379 | (parse-filter name code) 380 | (mapcar (lambda (action) 381 | (apply #'parse-action 382 | (first action) 383 | (rest action))) 384 | actions))) 385 | 386 | ;;; Extractors. 387 | 388 | (defmethod extract ((field (eql 'usr::origin)) (object t)) 389 | (pastelyzer:paste-source object)) 390 | 391 | (defmethod extract ((field (eql 'usr::source-url)) (object t)) 392 | (warn "SOURCE-URL extractor is deprecated, use ORIGIN instead.") 393 | (extract 'usr::origin object)) 394 | 395 | (defmethod extract ((field (eql 'usr::local-url)) (object t)) 396 | (pastelyzer::external-url-to object)) 397 | 398 | (defmethod extract ((field (eql 'usr::remote-url)) (object t)) 399 | ;; Should cache this result. 400 | (multiple-value-bind (source url raw-url) 401 | (pastelyzer::paste-source object) 402 | (declare (ignore source raw-url)) 403 | (when url 404 | (puri:render-uri url nil)))) 405 | 406 | (defmethod extract ((field (eql 'usr::remote-raw-url)) (object t)) 407 | ;; Should use the cached result. 408 | (multiple-value-bind (source url raw-url) 409 | (pastelyzer::paste-source object) 410 | (declare (ignore source url)) 411 | (when raw-url 412 | (puri:render-uri raw-url nil)))) 413 | 414 | (defmethod extract ((field (eql 'usr::artefact-descriptions)) (sink sink:sink)) 415 | (with-output-to-string (out) 416 | (let ((groups (sink:group-artefacts sink))) 417 | (loop for group in groups 418 | for (class unique important duplicate-count) = group 419 | do (terpri out) 420 | (pastelyzer::summarize-artefact-group out group) 421 | (terpri out) 422 | (terpri out) 423 | (loop for bag being each hash-value in unique 424 | for artefact = (first bag) 425 | for string = (pastelyzer:artefact-description artefact) 426 | do (write-string string out) 427 | (terpri out)))))) 428 | 429 | (defmethod extract ((field (eql 'usr::artefact-summary-by-class)) 430 | (sink sink:sink)) 431 | (let ((groups (sink:group-artefacts sink))) 432 | ;; TODO: Waiting for the artefact summarization refactoring. 433 | (pastelyzer::summarize-artefacts groups :text))) 434 | 435 | (defmethod extract ((field (eql 'usr::digits)) 436 | (artefact pastelyzer:bank-card-number)) 437 | (pastelyzer:bank-card-number-digits artefact)) 438 | 439 | (defmethod extract ((field (eql 'usr::note)) 440 | (artefact pastelyzer:artefact)) 441 | (pastelyzer:artefact-note artefact)) 442 | 443 | (defmethod extract ((field (eql 'usr::important)) 444 | (artefact pastelyzer:artefact)) 445 | (pastelyzer:important-artefact-p artefact)) 446 | 447 | (defmethod extract ((field (eql 'usr::source-string)) 448 | (artefact pastelyzer:artefact)) 449 | (pastelyzer:artefact-source artefact)) 450 | 451 | (defmethod extract ((field (eql 'usr::source-context)) 452 | (artefact pastelyzer:artefact)) 453 | (pastelyzer:artefact-source-seq artefact)) 454 | 455 | (defmethod extract ((field (eql 'usr::context-before)) 456 | (artefact pastelyzer:artefact)) 457 | (pastelyzer::artefact-context-before artefact)) 458 | 459 | (defmethod extract ((field (eql 'usr::context-after)) 460 | (artefact pastelyzer:artefact)) 461 | (pastelyzer::artefact-context-after artefact)) 462 | 463 | (defmethod extract ((field (eql 'usr::bytes)) 464 | (artefact pastelyzer:artefact)) 465 | (pastelyzer:embedded-binary-bytes artefact)) 466 | -------------------------------------------------------------------------------- /src/config/loader.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.loader 2 | (:use #:common-lisp) 3 | (:import-from #:pastelyzer.config.sink 4 | #:make-configuration 5 | #:register-configuration) 6 | (:import-from #:pastelyzer.config.filter 7 | #:add-filter) 8 | (:import-from #:pastelyzer.log 9 | #:msg) 10 | (:local-nicknames (#:usr #:pastelyzer.config.user) 11 | (#:sink #:pastelyzer.config.sink) 12 | (#:filter #:pastelyzer.config.filter)) 13 | (:export #:load-configuration 14 | #:apply-directive)) 15 | 16 | (in-package #:pastelyzer.config.loader) 17 | 18 | (defun read-hex-bytes (stream char) 19 | (assert (char= #\[ char)) 20 | (let* ((*read-base* 16) 21 | (bytes (read-delimited-list #\] stream t))) 22 | (assert (every (lambda (value) 23 | (typep value '(unsigned-byte 8))) 24 | bytes)) 25 | (make-array (length bytes) 26 | :element-type '(unsigned-byte 8) 27 | :initial-contents bytes))) 28 | 29 | (defun disabled-sharp-dot-reader (stream char n) 30 | (declare (ignore char n)) 31 | (warn "Ignoring sharp-dot form: ~S" (read stream t nil t))) 32 | 33 | (defun create-user-readtable () 34 | (let ((readtable (copy-readtable nil))) 35 | (set-macro-character #\[ #'read-hex-bytes nil readtable) 36 | (set-macro-character #\] (get-macro-character #\)) nil readtable) 37 | (set-dispatch-macro-character #\# #\. #'disabled-sharp-dot-reader readtable) 38 | readtable)) 39 | 40 | (defun user-readtable () 41 | (create-user-readtable)) 42 | 43 | (defun maybe-continue (condition) 44 | (let ((continue (find-restart 'skip condition))) 45 | (when continue 46 | (invoke-restart continue)))) 47 | 48 | (defmethod apply-directive ((directive t) (args list)) 49 | (error "Invalid configuration form: (~S~{ ~S~})" directive args)) 50 | 51 | (defmethod apply-directive ((directive (eql 'usr:define-sink)) 52 | (args list)) 53 | (destructuring-bind (name (parent) &rest attributes) 54 | args 55 | (msg :debug "Defining sink ~S" name) 56 | (register-configuration 57 | (make-configuration name parent attributes)))) 58 | 59 | (defmethod apply-directive ((directive (eql 'usr:define-artefact-filter)) 60 | (args list)) 61 | (destructuring-bind (name code &rest actions) 62 | args 63 | (msg :debug "Defining artefact filter ~S" name) 64 | (add-filter 'filter:filter name code actions))) 65 | 66 | (defmethod load-configuration ((source pathname)) 67 | (with-open-file (stream source :direction :input) 68 | (load-configuration stream))) 69 | 70 | (defun skip-to-char (stream char) 71 | (loop for next = (read-char stream) 72 | until (char= char next) 73 | finally (unread-char next stream))) 74 | 75 | (defun read-form (stream) 76 | "Reads a form from STREAM and returns two values: the form read (or 77 | EOF-VALUE if end of file was encountered and EOF-ERROR-P is true), 78 | and position of the form in the STREAM." 79 | (loop for char = (read-char stream) 80 | do (case char 81 | (#\; 82 | (skip-to-char stream #\newline)) 83 | (#\( 84 | (unread-char char stream) 85 | (let ((position (file-position stream))) 86 | (return (values (read stream) 87 | position))))))) 88 | 89 | (defun read-configuration (source) 90 | (let ((*package* (pastelyzer.config.package:user-package)) 91 | (*read-eval* nil) 92 | (*readtable* (user-readtable))) 93 | (handler-case 94 | (loop with eof = '#:eof 95 | for (form position) = (multiple-value-list (read-form source)) 96 | until (eq form eof) 97 | do (with-simple-restart 98 | (skip "Ignore this directive (~A)." (first form)) 99 | (handler-bind 100 | ((error 101 | (lambda (condition) 102 | (msg :error "Character ~A: ~A" position condition) 103 | ;; XXX: While debugging. 104 | (maybe-continue condition)))) 105 | (apply-directive (first form) (rest form))))) 106 | (end-of-file () 107 | nil)))) 108 | 109 | (defmethod load-configuration ((source stream)) 110 | ;; This looks like a hack because it is: instead of clobbering the 111 | ;; existing configuration we load configuration in a "clean" 112 | ;; environment, and if there are no errors install the new 113 | ;; configuration. This is not needed yet, but will be once we allow 114 | ;; to re-load configuration at run-time. 115 | (multiple-value-setq (sink::*known-configurations* filter::*filters*) 116 | (let ((sink::*known-configurations* '()) 117 | (filter::*filters* '())) 118 | (read-configuration source) 119 | (values sink::*known-configurations* 120 | filter::*filters*)))) 121 | -------------------------------------------------------------------------------- /src/config/package.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.user 2 | (:use) 3 | (:import-from #:pastelyzer 4 | #:artefact 5 | #:binary-artefact 6 | #:compressed-blob 7 | #:encoded-string 8 | #:string-artefact 9 | 10 | #:bank-card-number 11 | #:credential 12 | #:domain 13 | #:onion 14 | #:email 15 | #:embedded-binary 16 | #:base64-blob 17 | #:binary-blob 18 | #:hex-blob 19 | #:ip-address 20 | #:ip-service 21 | #:resolved-ip-address 22 | #:m3u-entry 23 | #:uri 24 | #:windows-internal) 25 | (:export #:define-sink 26 | #:define-artefact-filter 27 | #:collect-into 28 | #:discard 29 | #:set-important 30 | #:set-note 31 | #:fmt 32 | #:env 33 | #:file-contents 34 | #:yes 35 | #:no 36 | #:true 37 | #:false 38 | #:digits 39 | #:note 40 | #:important 41 | #:source-string 42 | #:source-context 43 | #:context-before 44 | #:context-after 45 | #:bytes 46 | 47 | #:or 48 | #:and 49 | #:not 50 | #:= 51 | #:> 52 | #:< 53 | #:type? 54 | #:exact-type? 55 | #:extract 56 | #:-> 57 | #:length 58 | #:starts-with? 59 | #:ends-with? 60 | #:contains? 61 | #:mixed-case? 62 | #:^ 63 | 64 | ;; Sets. 65 | #:define-set 66 | #:strings 67 | #:cc-bins 68 | #:super-domains 69 | #:ipv4-networks 70 | #:member? 71 | )) 72 | 73 | (defpackage #:pastelyzer.config.package 74 | (:use :common-lisp) 75 | (:export #:user-package 76 | #:user-identifier 77 | #:user-identifier-p)) 78 | 79 | (in-package #:pastelyzer.config.package) 80 | 81 | (defun user-package () 82 | (find-package "PASTELYZER.CONFIG.USER")) 83 | 84 | (defun user-identifier (name) 85 | (let* ((package (user-package)) 86 | (symbol (etypecase name 87 | (symbol 88 | (import name package) 89 | name) 90 | (string 91 | (intern name package))))) 92 | (export symbol package) 93 | symbol)) 94 | 95 | (defun user-identifier-p (thing) 96 | (and (symbolp thing) 97 | (eq thing (find-symbol (symbol-name thing) (user-package))))) 98 | -------------------------------------------------------------------------------- /src/config/sets.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.sets 2 | (:use :common-lisp) 3 | (:import-from #:pastelyzer.log 4 | #:msg) 5 | (:import-from #:split-sequence 6 | #:split-sequence) 7 | (:import-from #:pastelyzer.config.filter 8 | #:make-function) 9 | (:local-nicknames (#:util #:pastelyzer.util) 10 | (#:usr #:pastelyzer.config.user) 11 | (#:filter #:pastelyzer.config.filter) 12 | (#:loader #:pastelyzer.config.loader))) 13 | 14 | (in-package #:pastelyzer.config.sets) 15 | 16 | (defun cfg-bool (value) 17 | (ecase value 18 | ((cl:nil usr:no usr:false) nil) 19 | ((cl:t usr:yes usr:true) t))) 20 | 21 | (defgeneric add-entry (set key &optional value)) 22 | (defgeneric load-set (type source &key &allow-other-keys)) 23 | (defgeneric contains? (datum set)) 24 | 25 | (defclass lookup-table () 26 | ((source 27 | :initarg :source 28 | :reader lookup-table-source 29 | :type (or null pathname)) 30 | (entries 31 | :initarg :entries 32 | :reader lookup-table-entries))) 33 | 34 | (defmethod contains? ((artefact pastelyzer:string-artefact) (set lookup-table)) 35 | (contains? (pastelyzer:artefact-source artefact) set)) 36 | 37 | (defmethod contains? :around ((artefact pastelyzer:artefact) (set lookup-table)) 38 | (multiple-value-bind (found note) 39 | (call-next-method) 40 | (when note 41 | (setf (pastelyzer:artefact-note artefact) note)) 42 | found)) 43 | 44 | (defmethod populate-set ((set lookup-table) (list cons) &rest keys) 45 | (when keys 46 | (warn "Options not supported for ~S: ~S" set keys)) 47 | (loop for entry in list 48 | do (multiple-value-bind (value comment) 49 | (etypecase entry 50 | (cons 51 | (values-list entry)) 52 | (string 53 | (values entry nil))) 54 | (add-entry set value comment))) 55 | set) 56 | 57 | (defmethod populate-set ((set lookup-table) (path pathname) 58 | &key (comment-start "#") 59 | (attach-comments nil) 60 | (trim-space t)) 61 | (with-open-file (in path) 62 | (loop with note = nil 63 | for line = (read-line in nil nil) 64 | while line 65 | do (when trim-space 66 | (setq line (util:trim-space line :both))) 67 | (if (zerop (length line)) 68 | (setq note nil) 69 | (let ((mm (mismatch comment-start line))) 70 | (cond ((null mm) 71 | ;; Empty comment line. 72 | (setq note nil)) 73 | ((zerop mm) 74 | ;; Not a comment line. 75 | (with-simple-restart 76 | (continue "Ignore invalid entry.") 77 | (add-entry set line note))) 78 | ((cfg-bool attach-comments) 79 | (let ((start (position-if-not #'util:whitespace-char-p 80 | line 81 | :start mm))) 82 | (setq note (if start 83 | (subseq line start) 84 | nil))))))))) 85 | set) 86 | 87 | (defclass string-set (lookup-table) 88 | ((entries 89 | ;; A simple hash table mapping strings to user supplied values. 90 | :type hash-table 91 | :initform (make-hash-table :test 'equal)))) 92 | 93 | (defmethod add-entry ((set string-set) (string string) &optional note) 94 | (setf (gethash string (lookup-table-entries set)) note)) 95 | 96 | (defmethod load-set ((type (eql 'usr:strings)) (list cons) 97 | &rest keys) 98 | (apply #'populate-set 99 | (make-instance 'string-set :source nil) 100 | list 101 | keys)) 102 | 103 | (defmethod load-set ((type (eql 'usr:strings)) (path pathname) &rest keys) 104 | (apply #'populate-set 105 | (make-instance 'string-set :source path) 106 | path 107 | keys)) 108 | 109 | (defmethod contains? ((string string) (set string-set)) 110 | (multiple-value-bind (value found) 111 | (gethash string (lookup-table-entries set)) 112 | (values found value))) 113 | 114 | (defclass cc-bin-set (lookup-table) 115 | ((entries 116 | ;; A two-level alist. The first level keys are card number 117 | ;; lengths. The second level maps prefix (BIN) length to a 118 | ;; hash-table. In this table BINs (strings of digits) are mapped 119 | ;; to user supplied values (notes). 120 | :reader cc-bin-set-bins 121 | :type list 122 | :initform '()))) 123 | 124 | (defmethod add-entry ((set cc-bin-set) (pattern string) &optional note) 125 | (let* ((pattern (remove-if #'util:whitespace-char-p pattern)) 126 | (length (length pattern)) 127 | (end (position-if-not #'digit-char-p pattern))) 128 | (when (zerop end) 129 | (error "Invalid bin pattern: ~S" pattern)) 130 | (with-slots (entries) 131 | set 132 | (let ((cell (assoc length entries))) 133 | (unless cell 134 | (setq cell (cons length '())) 135 | (push cell entries)) 136 | (let* ((prefix (subseq pattern 0 end)) 137 | (plength (length prefix)) 138 | (table (cdr (assoc plength (cdr cell))))) 139 | (unless table 140 | (setq table (make-hash-table :test 'equal)) 141 | (setf (cdr cell) (acons plength table (cdr cell)))) 142 | (multiple-value-bind (value found) 143 | (gethash prefix table) 144 | (when found 145 | (warn "BIN ~A already in set~@[ (~A)~]" pattern value))) 146 | (msg :debug "Adding CC bin ~A/~D~@[: ~A~]" prefix length note) 147 | (setf (gethash prefix table) note)))))) 148 | 149 | (defmethod load-set ((type (eql 'usr:cc-bins)) (list cons) &rest keys) 150 | (apply #'populate-set 151 | (make-instance 'cc-bin-set :source nil) 152 | list 153 | keys)) 154 | 155 | (defmethod load-set ((type (eql 'usr:cc-bins)) (path pathname) &rest keys) 156 | (msg :info "Reading important CC bins from ~A" path) 157 | (let* ((result (apply #'populate-set 158 | (make-instance 'cc-bin-set :source path) 159 | path 160 | keys)) 161 | (count (loop for (nil . bins) in (lookup-table-entries result) 162 | sum (loop for (nil . table) in bins 163 | sum (hash-table-count table))))) 164 | (msg :info "Read ~D bin~:P from ~A" count path) 165 | result)) 166 | 167 | (defmethod contains? ((digits string) (set cc-bin-set)) 168 | (with-slots (entries) 169 | set 170 | (loop with bins = (cdr (assoc (length digits) entries)) 171 | for (plength . table) in bins 172 | do (multiple-value-bind (note found) 173 | (gethash (subseq digits 0 plength) table) 174 | (when found 175 | (return (values found note))))))) 176 | 177 | (defclass ipv4-network-set (lookup-table) 178 | ((entries 179 | ;; An alist mapping prefix-length to a hash-table. Hash table key 180 | ;; is a prefix, and value is the user supplied note (can be NIL). 181 | ;; 182 | ;; Theoretically the networks should not overlap, but it seems a 183 | ;; good idea to allow it so that known finer-grained networks can 184 | ;; be reported, and the bigger prefixes left as a fall-back. 185 | ;; Entries are be stored (and therefore also checked) with the 186 | ;; longest prefixes first. 187 | :type list 188 | :initform '()))) 189 | 190 | (defmethod add-entry ((set ipv4-network-set) (network ip:ipv4-network) 191 | &optional note) 192 | (with-slots (entries) 193 | set 194 | (let* ((provided-bits (ip:ipv4-network-bits network)) 195 | (prefix (ip:ipv4-network-prefix network)) 196 | (bits (mask-field (byte prefix (- 32 prefix)) provided-bits)) 197 | (cell (assoc prefix entries)) 198 | (table (if cell 199 | (cdr cell) 200 | (let ((table (make-hash-table))) 201 | (setf entries 202 | (sort (acons prefix table entries) #'> 203 | :key #'car)) 204 | table)))) 205 | (multiple-value-bind (value found) 206 | (gethash bits table) 207 | (declare (ignore value)) 208 | (when found 209 | (warn "Network ~S already present in ~S" network set))) 210 | (setf (gethash bits table) note) 211 | set))) 212 | 213 | (defmethod add-entry ((set ipv4-network-set) (network string) &optional note) 214 | (add-entry set (ip:parse-address network :network) note)) 215 | 216 | (defmethod load-set ((type (eql 'usr:ipv4-networks)) (list cons) 217 | &rest keys) 218 | (apply #'populate-set 219 | (make-instance 'ipv4-network-set :source nil) 220 | list 221 | keys)) 222 | 223 | (defmethod load-set ((type (eql 'usr:ipv4-networks)) (path pathname) &rest keys) 224 | (apply #'populate-set 225 | (make-instance 'ipv4-network-set :source path) 226 | path 227 | keys)) 228 | 229 | (defmethod contains? ((address ip:ip-address) (set ipv4-network-set)) 230 | (loop with entries = (slot-value set 'entries) 231 | with address-bits = (ip:ipv4-address-bits address) 232 | for (prefix . table) of-type ((integer 1 32) . hash-table) in entries 233 | do (let ((bits (mask-field (byte prefix (- 32 prefix)) address-bits))) 234 | (multiple-value-bind (value found) 235 | (gethash bits table) 236 | (when found 237 | (return (values t value))))))) 238 | 239 | (defmethod contains? ((artefact pastelyzer:ip-address) (set ipv4-network-set)) 240 | (contains? (pastelyzer::artefact-address artefact) set)) 241 | 242 | (defclass super-domain-set (lookup-table) 243 | ((entries 244 | ;; A tree of hash tables. Key is a domain label. Value is either 245 | ;; another hash table, or a value to return for the entry (a note 246 | ;; or NIL). 247 | :initarg :entries 248 | :reader super-domain-set-entries 249 | :type hash-table 250 | :initform (make-hash-table :test 'equalp)))) 251 | 252 | (defun hashtree-add-path (tree path &optional value) 253 | (check-type tree hash-table) 254 | (check-type path list) 255 | (if (null (cdr path)) 256 | (setf (gethash (car path) tree) value) 257 | (multiple-value-bind (table found) 258 | (gethash (car path) tree) 259 | (if found 260 | (if (typep table 'hash-table) 261 | (hashtree-add-path table (cdr path) value) 262 | (warn "Entry already present for ~S (~A); ignoring ~S (~A)" 263 | (car path) table (cdr path) value)) 264 | (let ((new (setf (gethash (car path) tree) 265 | (make-hash-table :test 'equalp)))) 266 | (hashtree-add-path new (cdr path) value)))))) 267 | 268 | (defun hashtree-present-p (tree path) 269 | (multiple-value-bind (value found) 270 | (gethash (car path) tree) 271 | (if found 272 | (cond ((not (typep value 'hash-table)) 273 | ;; Terminating table — all sub-searches match. 274 | (values t value)) 275 | ((null (cdr path)) 276 | ;; Path too short. 277 | nil) 278 | (t 279 | (hashtree-present-p value (cdr path)))) 280 | nil))) 281 | 282 | (defmethod contains? ((domain string) (set super-domain-set)) 283 | (hashtree-present-p (lookup-table-entries set) 284 | (reverse (split-sequence #\. domain)))) 285 | 286 | (defmethod add-entry ((set super-domain-set) (value string) &optional note) 287 | (with-slots (entries) 288 | set 289 | (let ((labels (split-sequence #\. value :remove-empty-subseqs t))) 290 | (hashtree-add-path entries (reverse labels) note)))) 291 | 292 | (defmethod load-set ((type (eql 'usr:super-domains)) (list cons) &rest keys) 293 | (apply #'populate-set 294 | (make-instance 'super-domain-set 295 | :source nil) 296 | list 297 | keys)) 298 | 299 | (defmethod load-set ((type (eql 'usr:super-domains)) (path pathname) &rest keys) 300 | (apply #'populate-set 301 | (make-instance 'super-domain-set :source path) 302 | path 303 | keys)) 304 | 305 | (defvar *known-sets* '()) 306 | 307 | (defun resolve-set (name) 308 | (cdr (assoc name *known-sets*))) 309 | 310 | (defun register-set (name datum) 311 | (let ((cell (assoc name *known-sets*))) 312 | (if cell 313 | (setf (cdr cell) datum) 314 | (setq *known-sets* (acons name datum *known-sets*))))) 315 | 316 | (defmethod loader::apply-directive ((directive (eql 'usr:define-set)) 317 | (args list)) 318 | (destructuring-bind 319 | (name (type) &rest keys 320 | &key file entries auto-reload comment-start attach-comments) 321 | args 322 | (declare (ignore auto-reload comment-start attach-comments)) 323 | (msg :debug "Defining ~(~S~) set ~S" type name) 324 | (when (and file entries) 325 | (error ":FILE and :ENTRIES are mutually exclusive")) 326 | (cond (entries 327 | (register-set name 328 | (apply #'load-set type entries 329 | (alexandria:remove-from-plist keys :entries)))) 330 | (file 331 | (unless (pathnamep file) 332 | (setq file (parse-namestring file))) 333 | (register-set name 334 | (apply #'load-set type file 335 | (alexandria:remove-from-plist keys :file)))) 336 | (t 337 | (error "Need either :FILE or :ENTRIES"))))) 338 | 339 | (defmethod filter:generate-filter-function ((operator (eql 'usr:member?)) 340 | &rest body) 341 | (check-type body (cons symbol null)) 342 | (let* ((identifier (first body)) 343 | (set (resolve-set identifier))) 344 | (make-function member? (artefact cont) 345 | (funcall cont (contains? artefact set))))) 346 | -------------------------------------------------------------------------------- /src/config/sink.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.sink 2 | (:use :common-lisp) 3 | (:import-from #:alexandria 4 | #:when-let) 5 | (:import-from #:pastelyzer.log 6 | #:msg) 7 | (:import-from #:pastelyzer.config.package 8 | #:user-identifier-p) 9 | (:local-nicknames (#:usr #:pastelyzer.config.user)) 10 | (:export #:prototype 11 | #:get-prototype 12 | #:check-args 13 | #:parse-sink-attribute 14 | #:parse-sink-attribute-value 15 | #:parse-action 16 | #:collect-artefact 17 | #:configuration-class 18 | 19 | #:configuration 20 | #:make-configuration 21 | #:register-configuration 22 | #:resolve-configuration 23 | 24 | #:sink 25 | #:add-artefact 26 | #:sink-configuration 27 | #:sink-document 28 | #:sink-artefacts 29 | #:group-artefacts 30 | #:attribute-value 31 | #:attribute-value-in-context 32 | #:make-attribute-value-composer 33 | #:finish-sink 34 | #:run-document-actions 35 | #:run-item-actions 36 | 37 | #:attribute-problem 38 | #:sink-problem 39 | #:attribute-error 40 | #:configuration-error 41 | #:invalid-attribute-type 42 | #:missing-attribute-value 43 | #:too-many-attribute-values)) 44 | 45 | (in-package #:pastelyzer.config.sink) 46 | 47 | (defgeneric parse-sink-attribute (prototype attribute &rest args)) 48 | 49 | (defgeneric parse-action (prototype scope action &rest body) 50 | (:documentation "Helper function to parse document and item actions.")) 51 | 52 | (defgeneric attribute-value (sink attribute)) 53 | 54 | (define-condition configuration-problem () 55 | ()) 56 | 57 | (define-condition configuration-error (configuration-problem error) 58 | ()) 59 | 60 | (define-condition file-does-not-exist (configuration-problem) 61 | ((path 62 | :initarg :path 63 | :reader path-of)) 64 | (:report (lambda (condition stream) 65 | (format stream "File does not exist: ~A" (path-of condition))))) 66 | 67 | (define-condition name-problem (configuration-problem) 68 | ((name 69 | :initarg :name 70 | :reader name-of))) 71 | 72 | (define-condition unknown-sink-problem (name-problem) 73 | () 74 | (:report (lambda (condition stream) 75 | (format stream "Unknown sink: ~S" (name-of condition))))) 76 | 77 | (define-condition unknown-sink-error (unknown-sink-problem configuration-error) 78 | ()) 79 | 80 | (define-condition unknown-sink-warning (unknown-sink-problem warning) 81 | ()) 82 | 83 | (define-condition sink-problem (configuration-problem) 84 | ((sink 85 | :initarg :sink 86 | :reader sink-of))) 87 | 88 | (define-condition attribute-problem (sink-problem) 89 | ((attribute 90 | :initarg :attribute 91 | :reader attribute-of))) 92 | 93 | (define-condition attribute-error (attribute-problem configuration-error) 94 | ()) 95 | 96 | (define-condition invalid-attribute-used (attribute-error) 97 | () 98 | (:report (lambda (condition stream) 99 | (format stream "Attempt to use misconfigured attribute ~S of ~A" 100 | (attribute-of condition) 101 | (name-of (sink-of condition)))))) 102 | 103 | (define-condition too-many-attribute-values (attribute-error) 104 | () 105 | (:report (lambda (condition stream) 106 | (format stream "Too many values provided for attribute ~S of ~A" 107 | (attribute-of condition) 108 | (name-of (sink-of condition)))))) 109 | 110 | (define-condition invalid-attribute-type (attribute-error) 111 | ((typespec 112 | :initarg :typespec 113 | :reader typespec-of) 114 | (value 115 | :initarg :value 116 | :reader value-of)) 117 | (:report (lambda (condition stream) 118 | (format stream "~S is not of type ~S (~S attribute of ~A)" 119 | (value-of condition) 120 | (typespec-of condition) 121 | (attribute-of condition) 122 | (name-of (sink-of condition)))))) 123 | 124 | (define-condition missing-attribute-value (attribute-error) 125 | () 126 | (:report (lambda (condition stream) 127 | (format stream "Missing value for attribute ~S of ~A" 128 | (attribute-of condition) 129 | (name-of (sink-of condition)))))) 130 | 131 | (define-condition unknown-attribute (attribute-error) 132 | () 133 | (:report (lambda (condition stream) 134 | (format stream "Unknown ~S attribute: ~S" 135 | (name-of (sink-of condition)) 136 | (attribute-of condition))))) 137 | 138 | (defun resolve-user-value (form) 139 | (etypecase form 140 | ((member usr:yes usr:true) t) 141 | ((member usr:no usr:false) nil) 142 | ((cons (eql usr:env) (cons string)) 143 | (destructuring-bind (name) 144 | (rest form) 145 | (or (uiop:getenv name) 146 | (error "Environment variable ~A missing" name)))) 147 | ((cons (eql usr:file-contents) (cons string)) 148 | (destructuring-bind (path) 149 | (rest form) 150 | (if (probe-file path) 151 | (alexandria:read-file-into-string path) 152 | (signal 'file-does-not-exist :path path)))) 153 | ((cons (eql usr:or)) 154 | (some #'resolve-user-value (rest form))) 155 | (keyword form) 156 | (string form))) 157 | 158 | (defun check-args (sink attribute args specs) 159 | (labels ((check-arg (value &key type transform) 160 | (setq value (resolve-user-value value)) 161 | (when (and type (not (typep value type))) 162 | (error 'invalid-attribute-type 163 | :sink sink 164 | :attribute attribute 165 | :typespec type 166 | :value value)) 167 | (if transform 168 | (funcall transform value) 169 | value))) 170 | (let ((result '())) 171 | (loop 172 | (when (endp specs) 173 | (if args 174 | (error 'too-many-attribute-values 175 | :sink sink 176 | :attribute attribute) 177 | (return (nreverse result)))) 178 | (let ((spec (pop specs))) 179 | (when (eq '&rest spec) 180 | (setq spec (pop specs)) 181 | (assert (not (null spec))) 182 | (assert (endp specs)) 183 | (return 184 | (nconc (nreverse result) 185 | (mapcar (lambda (value) 186 | (apply #'check-arg value spec)) 187 | args)))) 188 | (unless args 189 | (error 'missing-sink-attribute-value 190 | :sink sink 191 | :attribute attribute)) 192 | (push (apply #'check-arg (pop args) spec) result)))))) 193 | 194 | (defclass sink () 195 | ((configuration 196 | :initarg :configuration 197 | :reader sink-configuration) 198 | (document 199 | :initarg :document 200 | :reader sink-document) 201 | (artefacts 202 | :initarg :artefacts 203 | :reader sink-artefacts 204 | :initform '()) 205 | (groups)) 206 | (:documentation 207 | "Subclasses of this class are instantiated for each processed 208 | document (on-demand) to collect relevant artefacts.")) 209 | 210 | (defmethod attribute-value :around ((ctx t) (attribute t)) 211 | ;; Fix the origin of unknown-attribute error. 212 | (handler-case (call-next-method) 213 | (unknown-attribute () 214 | (error 'unknown-attribute 215 | :sink ctx 216 | :attribute attribute)))) 217 | 218 | (defmethod attribute-value ((sink sink) (attribute t)) 219 | (attribute-value (sink-configuration sink) attribute)) 220 | 221 | (defmethod add-artefact ((sink sink) (artefact t)) 222 | (push artefact (slot-value sink 'artefacts))) 223 | 224 | (defun group-artefacts (sink) 225 | (if (slot-boundp sink 'groups) 226 | (slot-value sink 'groups) 227 | (setf (slot-value sink 'groups) 228 | (pastelyzer::group-artefacts (sink-artefacts sink))))) 229 | 230 | (defclass prototype () 231 | ()) 232 | 233 | (defmethod name-of ((object prototype)) 234 | (class-name (class-of object))) 235 | 236 | (defclass configuration () 237 | ((name 238 | :initarg :name 239 | :reader name-of 240 | :type symbol) 241 | (parent 242 | :initarg :parent 243 | :reader parent-of 244 | :type (or configuration prototype)) 245 | (attributes 246 | :initarg :attributes 247 | :reader attributes-of 248 | :type list 249 | :initform '()) 250 | (document-actions 251 | :initarg :document-actions 252 | :reader document-actions 253 | :type list 254 | :initform '()) 255 | (item-actions 256 | :initarg :item-actions 257 | :reader item-actions 258 | :type list 259 | :initform '()))) 260 | 261 | (defmethod print-object ((object configuration) (stream t)) 262 | (print-unreadable-object (object stream :type t :identity t) 263 | (format stream "~A" (slot-value object 'name)))) 264 | 265 | (defmethod initialize-instance :around 266 | ((instance configuration) &rest initargs &key attributes &allow-other-keys) 267 | ;; We might get rid of this method if we make PARSE-SINK-ATTRIBUTE 268 | ;; into PROCESS-CONFIGURATION-OPTION that would act directly on a 269 | ;; CONFIGURATION instance. 270 | (let* ((document-actions '()) 271 | (item-actions '()) 272 | (attrs (remove-if (lambda (attr) 273 | (case (first attr) 274 | (:document-action 275 | (push (second attr) document-actions) 276 | t) 277 | (:item-action 278 | (push (second attr) item-actions) 279 | t))) 280 | attributes))) 281 | (apply #'call-next-method instance 282 | :document-actions (nreverse document-actions) 283 | :item-actions (nreverse item-actions) 284 | :attributes attrs 285 | initargs))) 286 | 287 | (defmethod configuration-class ((proto prototype)) 288 | 'configuration) 289 | 290 | (defmethod attribute-value ((cfg configuration) (attribute t)) 291 | (let ((spec (find attribute (attributes-of cfg) :key #'car))) 292 | (if spec 293 | (second spec) 294 | (attribute-value (parent-of cfg) attribute)))) 295 | 296 | ;;; XXX: Consider using symbol plist for this. 297 | (defvar *known-configurations* nil 298 | "An association list of known sink configurations.") 299 | 300 | (defun register-configuration (sink &aux (name (name-of sink))) 301 | (let ((cons (assoc name *known-configurations*))) 302 | (cond (cons 303 | (warn "Replacing existing sink configuraton ~A" name) 304 | (setf (cdr cons) sink)) 305 | (t 306 | (setq *known-configurations* 307 | (acons name sink *known-configurations*)))))) 308 | 309 | (defun resolve-configuration (name) 310 | (let ((cons (assoc name *known-configurations*))) 311 | (if cons 312 | (cdr cons) 313 | (get-prototype name)))) 314 | 315 | (defmethod parse-sink-attribute :around 316 | ((proto prototype) (attribute t) &rest args) 317 | (declare (ignore args)) 318 | (with-simple-restart 319 | (continue "Leave the attribute unconfigured (~S)." attribute) 320 | (return-from parse-sink-attribute (call-next-method))) 321 | (cons attribute (lambda (context) 322 | (error 'invalid-attribute-used 323 | :sink context 324 | :attribute attribute)))) 325 | 326 | (defun make-configuration (name parent attributes) 327 | (let* ((proto (get-prototype parent)) 328 | (attributes 329 | (mapcar (lambda (spec) 330 | (apply #'parse-sink-attribute proto spec)) 331 | attributes))) 332 | (make-instance (configuration-class proto) 333 | :name name 334 | :parent (resolve-configuration parent) 335 | :attributes attributes))) 336 | 337 | (defun attribute-value-in-context (cfg attribute context) 338 | (flet ((resolve (value) 339 | (if (functionp value) 340 | (funcall value context) 341 | value))) 342 | (let ((value (attribute-value cfg attribute))) 343 | (if (consp value) 344 | (mapcar #'resolve value) 345 | (resolve value))))) 346 | 347 | (defmethod attribute-value ((cfg prototype) (attribute t)) 348 | (error 'unknown-attribute 349 | :sink cfg 350 | :attribute attribute)) 351 | 352 | (defmethod attribute-value ((cfg null) (attribute t)) 353 | (error "Programming is hard")) 354 | 355 | ;;; XXX FIXME: Prevent recursive definitions. 356 | (defmethod get-prototype ((instance configuration)) 357 | (get-prototype (parent-of instance))) 358 | 359 | (defmethod get-prototype ((instance sink)) 360 | (get-prototype (sink-configuration instance))) 361 | 362 | (defmethod get-prototype ((instance prototype)) 363 | instance) 364 | 365 | (defmethod get-prototype ((name symbol)) 366 | (let ((cons (assoc name *known-configurations*))) 367 | (if cons 368 | (get-prototype (cdr cons)) 369 | (error 'unknown-sink-error :name name)))) 370 | 371 | (defmethod parse-sink-attribute ((impl prototype) (attribute symbol) 372 | &rest args) 373 | (declare (ignore args)) 374 | (error 'unknown-attribute 375 | :sink impl 376 | :attribute attribute)) 377 | 378 | (defmethod parse-sink-attribute ((impl prototype) (attribute (eql :deduplicate)) 379 | &rest args) 380 | (list* attribute (check-args impl attribute args '((:type boolean))))) 381 | 382 | (defmethod parse-sink-attribute 383 | ((impl prototype) (attribute (eql :document-action)) &rest args) 384 | (check-type args (cons cons null)) 385 | (destructuring-bind ((action &rest body)) 386 | args 387 | (assert (user-identifier-p action)) 388 | (list attribute (apply #'parse-action impl :document action body)))) 389 | 390 | (defmethod parse-sink-attribute 391 | ((impl prototype) (attribute (eql :item-action)) &rest args) 392 | (check-type args (cons cons null)) 393 | (destructuring-bind ((action &rest body)) 394 | args 395 | (assert (user-identifier-p action)) 396 | (list attribute (apply #'parse-action impl :item action body)))) 397 | 398 | (defgeneric finish-sink (proto sink)) 399 | 400 | (defgeneric run-document-actions (prototype sink &key &allow-other-keys)) 401 | (defgeneric run-item-actions (prototype sink &key &allow-other-keys)) 402 | 403 | ;;; We might want to collect document and item actions across 404 | ;;; configuration inheritance tree. But then again there should be a 405 | ;;; way to disable/override some actions from parent configuration, 406 | ;;; which means the actions should be named. Currently only actions 407 | ;;; from sink configuration are used. 408 | (defmethod run-document-actions 409 | ((prototype prototype) (sink sink) &rest args) 410 | (loop with cfg = (sink-configuration sink) 411 | for action in (document-actions cfg) 412 | do (apply action (sink-document sink) args))) 413 | 414 | (defmethod run-item-actions 415 | ((prototype prototype) (sink sink) &rest args) 416 | (let* ((cfg (sink-configuration sink)) 417 | (actions (item-actions cfg))) 418 | (loop for artefact in (sink-artefacts sink) 419 | do (loop for action in actions 420 | do (apply action artefact args))))) 421 | -------------------------------------------------------------------------------- /src/config/smtp.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.smtp 2 | (:use #:common-lisp) 3 | (:import-from #:pastelyzer.log 4 | #:msg) 5 | (:import-from #:pastelyzer.config.sink 6 | #:prototype 7 | #:get-prototype 8 | #:check-args 9 | #:parse-sink-attribute 10 | #:parse-sink-attribute-value 11 | #:sink 12 | #:sink-document 13 | #:sink-configuration 14 | #:group-artefacts 15 | #:attribute-value 16 | #:attribute-value-in-context 17 | #:finish-sink 18 | #:invalid-attribute-type) 19 | (:local-nicknames (#:util #:pastelyzer.config.util)) 20 | (:import-from #:pastelyzer.config.package 21 | #:user-identifier)) 22 | 23 | (in-package #:pastelyzer.config.smtp) 24 | 25 | (defclass smtp (prototype) 26 | ()) 27 | 28 | (defvar *sink-prototype* nil) 29 | 30 | (defmethod get-prototype ((name (eql (user-identifier "SMTP-SINK")))) 31 | (or *sink-prototype* 32 | (setq *sink-prototype* (make-instance 'smtp)))) 33 | 34 | (defmethod parse-sink-attribute ((impl smtp) (attribute symbol) 35 | &rest args) 36 | (cond ((member attribute '(:server :from)) 37 | (list* attribute (check-args impl attribute args '((:type string))))) 38 | (t 39 | (call-next-method)))) 40 | 41 | (defmethod parse-sink-attribute 42 | ((impl smtp) (attribute (eql :subject)) &rest args) 43 | (list attribute (util:parse-user-template args))) 44 | 45 | (defmethod parse-sink-attribute ((impl smtp) (attribute (eql :body)) 46 | &rest args) 47 | (list attribute (util:parse-user-template args))) 48 | 49 | (defmethod parse-sink-attribute ((impl smtp) (attribute (eql :recipients)) 50 | &rest args) 51 | (list attribute (check-args impl attribute args '(&rest (:type string))))) 52 | 53 | (defmethod finish-sink ((proto smtp) (sink sink)) 54 | ;; TODO: The presence of these fields should be checked after 55 | ;; loading the configuration. 56 | (flet ((ensure-value (name) 57 | (let* ((document (sink-document sink)) 58 | (value (attribute-value-in-context sink name document))) 59 | (cond ((plusp (length value)) 60 | value) 61 | (t 62 | (msg :error "Missing value for field ~S in ~S for ~S" 63 | name 64 | (sink-configuration sink) 65 | document) 66 | (return-from finish-sink nil)))))) 67 | (cl-smtp:send-email 68 | (ensure-value :server) 69 | (ensure-value :from) 70 | (ensure-value :recipients) 71 | (ensure-value :subject) 72 | (ensure-value :body)))) 73 | -------------------------------------------------------------------------------- /src/config/util.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.config.util 2 | (:use #:common-lisp) 3 | (:import-from #:alexandria 4 | #:when-let) 5 | (:local-nicknames (#:sink #:pastelyzer.config.sink) 6 | (#:filter #:pastelyzer.config.filter) 7 | (#:usr #:pastelyzer.config.user)) 8 | (:export #:parse-item-function 9 | #:parse-dynamic-attribute 10 | #:make-attribute-value-composer 11 | #:parse-user-template)) 12 | 13 | (in-package #:pastelyzer.config.util) 14 | 15 | (defun parse-item-function (code &optional (name '#:anonymous)) 16 | ;; XXX: Filters were not exactly designed for this... 17 | (pastelyzer.config.filter::parse-filter name code)) 18 | 19 | (defun make-formatter (control-string &rest args) 20 | (let ((values (mapcar #'parse-item-function args))) 21 | (lambda (context) 22 | (apply #'format nil control-string 23 | (mapcar (lambda (fn) (funcall fn context)) values))))) 24 | 25 | (defun parse-dynamic-attribute (attr &optional name) 26 | (typecase attr 27 | (cons 28 | (case (car attr) 29 | (usr:fmt 30 | (apply #'make-formatter (rest attr))) 31 | (t 32 | (parse-item-function attr)))) 33 | ((or string keyword) 34 | attr) 35 | (null 36 | nil) 37 | (t 38 | (error "Invalid value~@[ for attribute ~S~]: ~S" name attr)))) 39 | 40 | (defun make-attribute-value-composer (values) 41 | (lambda (context) 42 | (with-output-to-string (out) 43 | (dolist (value values) 44 | (cond ((eql :nl value) 45 | (terpri out)) 46 | ((eql :fl value) 47 | (fresh-line out)) 48 | ((functionp value) 49 | (when-let (datum (funcall value context)) 50 | (princ datum out))) 51 | (t 52 | (princ value out))))))) 53 | 54 | (defun parse-user-template (args) 55 | (make-attribute-value-composer (mapcar #'parse-dynamic-attribute args))) 56 | -------------------------------------------------------------------------------- /src/fmt.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:fmt) 2 | 3 | (defun nbytes (stream n &optional colon-p at-sign-p) 4 | "Formats amount of N bytes in a human-readable fashion using powers 5 | of 1024, or powers of 1000 if COLON-P is true." 6 | (declare (ignore at-sign-p) 7 | (type unsigned-byte n)) 8 | (cond ((zerop n) 9 | (write-string "0B" stream)) 10 | (t 11 | (multiple-value-bind (base units) 12 | (if colon-p 13 | (values 1000.0d0 "BkMGTPEZY") 14 | (values 1024.0d0 "BKMGTPEZY")) 15 | (loop for i fixnum from 0 below (1- (length units)) 16 | for f double-float = (coerce n 'double-float) then (/ f base) 17 | until (< f base) 18 | finally (let ((unit (schar units i))) 19 | (if (and (< f 10) (plusp i)) 20 | (format stream "~,1F~A" f unit) 21 | (format stream "~D~A" (round f) unit)))))))) 22 | 23 | (defun bytes (stream bytes &optional colon-p at-sign-p) 24 | "Formats a sequence of BYTES as hex-digit pairs." 25 | (declare (ignore colon-p at-sign-p)) 26 | (etypecase bytes 27 | (vector 28 | (loop for byte of-type (unsigned-byte 8) across bytes 29 | do (when (< byte #x10) 30 | (write-char #\0 stream)) 31 | (write byte :stream stream 32 | :base 16 33 | :radix nil 34 | :readably nil 35 | :escape nil))) 36 | (list 37 | (format stream "~{~2,'0x~}" bytes)))) 38 | -------------------------------------------------------------------------------- /src/job.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | (defgeneric job-subject (job)) 4 | 5 | (defgeneric register-artefact (job artefact source) 6 | (:documentation 7 | "Called whenever an ARTEFACT is exctracted (from SOURCE).")) 8 | 9 | (defgeneric finish-job (job)) 10 | 11 | (defgeneric resolve-domains-p (job) 12 | (:documentation 13 | "Whether to resolve discovered domains.")) 14 | 15 | (defclass job () 16 | ((subject 17 | :initarg :subject 18 | :reader job-subject 19 | :documentation 20 | "The object that triggered this job.")) 21 | (:documentation 22 | "For each item to be processed an instance of this class is 23 | created. 24 | 25 | Controls what extractors are being used. For instance when testing 26 | none of the extractors that connect to external systems are 27 | active.")) 28 | 29 | (defmethod print-object ((job job) (stream t)) 30 | (print-unreadable-object (job stream :type t :identity t) 31 | (princ (job-subject job) stream))) 32 | 33 | (defmethod register-artefact ((job job) (artefact t) (source t)) 34 | artefact) 35 | 36 | (defmethod finish-job ((job job)) 37 | ;; Do nothing by default. 38 | nil) 39 | 40 | (defmethod resolve-domains-p ((job job)) 41 | *resolve-domains*) 42 | -------------------------------------------------------------------------------- /src/json-api.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | (defun provider-urls (provider id) 4 | (cond ((string= "circl" provider) 5 | (mapcar #'remote-content-location 6 | (multiple-value-list (circl-provider-id-to-url id)))) 7 | ((string= "web" provider) 8 | (list id)) 9 | (t 10 | nil))) 11 | 12 | (defmethod content-uri ((content content)) 13 | (content-uri (content-id content))) 14 | 15 | (defmethod content-uri ((content integer)) 16 | (puri:merge-uris (format nil "/content/~D/" content) 17 | *web-server-external-uri*)) 18 | 19 | (defmethod paste-uri ((paste paste)) 20 | (paste-uri (paste-id paste))) 21 | 22 | (defmethod paste-uri ((paste integer)) 23 | (puri:merge-uris (format nil "/paste/~D/" paste) 24 | *web-server-external-uri*)) 25 | 26 | (defun artefact-to-jsown (content-id type value extra important note) 27 | `(:obj ("type" . ,type) 28 | ("value" . ,value) 29 | ,@(when important 30 | `(("important" . :true))) 31 | ,@(unless (eq :null note) 32 | `(("note" . ,note))) 33 | ,@(when (and extra (not (eq :null extra))) 34 | `(("extra" . ,extra))) 35 | ("source" . ,(princ-to-string (content-uri content-id))))) 36 | 37 | (defun artefact-to-jsown* (content-id type value extra important note) 38 | (let ((fields (string-case (type) 39 | ("BANK-CARD-NUMBER" 40 | `(("type" . "cc-number") 41 | ("digits" . ,value))) 42 | ("DOMAIN" 43 | `(("type" . "domain") 44 | ("domain" . ,value))) 45 | ("ONION" 46 | `(("type" . "onion") 47 | ("address" . ,value))) 48 | ("EMAIL" 49 | `(("type" . "email") 50 | ("email" . ,value))) 51 | ("CREDENTIAL" 52 | `(("type" . "credential") 53 | ("username" . ,value) 54 | ("passphrase" . ,extra))) 55 | ("IP-ADDRESS" 56 | `(("type" . "ip") 57 | ("address" . ,value))) 58 | ("IP-SERVICE" 59 | `(("type" . "service") 60 | ("address" . ,value) 61 | ("port" . ,(parse-integer extra)))) 62 | ("RESOLVED-IP-ADDRESS" 63 | `(("type" . "ip") 64 | ("address" . ,value) 65 | ("domain" . ,extra))) 66 | ("BASE64-BLOB" 67 | `(("type" . "blob") 68 | ("sha1" . ,value) 69 | ("encoding" . "base64") 70 | ("start" . ,extra))) 71 | ("HEX-BLOB" 72 | `(("type" . "blob") 73 | ("sha1" . ,value) 74 | ("encoding" . "hex") 75 | ("start" . ,extra))) 76 | ("BINARY-BLOB" 77 | `(("type" . "blob") 78 | ("sha1" . ,value) 79 | ("encoding" . "binary") 80 | ("start" . ,extra))) 81 | (t 82 | `(("type" . ,type) 83 | ("value" . ,value) 84 | ,@(when (and extra (not (eq :null extra))) 85 | `(("extra" . ,extra)))))))) 86 | `(:obj ,@fields 87 | ,@(when important 88 | `(("important" . :true))) 89 | ,@(unless (eq :null note) 90 | `(("note" . ,note))) 91 | ("source" . ,(princ-to-string (content-uri content-id)))))) 92 | 93 | (defun paste-to-jsown (paste-id provider provider-id timestamp content-id) 94 | `(:obj ("type" . "paste") 95 | ("id" . ,paste-id) 96 | ("location" . ,(princ-to-string (paste-uri paste-id))) 97 | ("provider" . ,provider) 98 | ("provider-id" . ,provider-id) 99 | ("timestamp" . ,(princ-to-string timestamp)) 100 | ("urls" . ,(mapcar #'princ-to-string 101 | (provider-urls provider provider-id))) 102 | ("content" . ,(princ-to-string (content-uri content-id))))) 103 | 104 | (defun parse-limit (value) 105 | (cond ((null value) 106 | 500) 107 | ((string= "" value) 108 | nil) 109 | ((every #'digit-char-p value) 110 | (parse-integer value)) 111 | (t 112 | :invalid))) 113 | 114 | (define-handler filter-artefacts :post "/artefacts" 115 | (let* ((value (ht:parameter "value")) 116 | (extra (ht:parameter "extra")) 117 | (limit (ht:parameter "limit")) 118 | (parsed-limit (parse-limit limit))) 119 | (cond ((eq :invalid parsed-limit) 120 | (msg :notice "~A -> /artefacts: invalid limit value: ~S" 121 | (ht:remote-addr*) limit) 122 | (values nil ht:+http-bad-request+)) 123 | ((or value extra) 124 | (let ((artefacts 125 | (db:with-connection () 126 | (db:search-artefacts :value value 127 | :extra extra 128 | :limit limit)))) 129 | (setf (ht:header-out :content-type) "application/json") 130 | (jsown:to-json 131 | (loop for artefact in artefacts 132 | collect (apply #'artefact-to-jsown artefact))))) 133 | (t 134 | (msg :notice "~A -> /artefacts: missing query parameters" 135 | (ht:remote-addr*)) 136 | (values nil ht:+http-bad-request+))))) 137 | 138 | (define-handler content-artefacts :get 139 | ("^/content/(\\d+)/artefacts(/typed)?$" content-id typed) 140 | (let ((artefacts 141 | (db:with-connection () 142 | (db:content-artefacts content-id)))) 143 | ;; XXX: What if the content does not exist? Should we respond 144 | ;; with not-found instead of an empty set? 145 | (setf (ht:header-out :content-type) "application/json") 146 | (jsown:to-json 147 | (loop for artefact in artefacts 148 | collect (apply (if typed #'artefact-to-jsown* #'artefact-to-jsown) 149 | artefact))))) 150 | 151 | (define-handler content-meta :get 152 | ("^/content/(\\d+)/?$" (#'parse-integer content-id)) 153 | (let ((sources 154 | (db:with-connection () 155 | (db:content-sources content-id))) 156 | (content-uri (content-uri content-id))) 157 | ;; XXX: This should definitely respond with not-found if the 158 | ;; content does not exist? 159 | (setf (ht:header-out :content-type) "application/json") 160 | (jsown:to-json 161 | `(:obj 162 | ("type" . "content") 163 | ("id" . ,content-id) 164 | ("location" . ,(princ-to-string content-uri)) 165 | ("body" . ,(princ-to-string (puri:merge-uris "body" content-uri))) 166 | ("sources" . ,(loop for paste in sources 167 | collect (apply #'paste-to-jsown paste))))))) 168 | 169 | (define-handler paste-meta :get 170 | ("^/paste/(\\d+)/?$" (#'parse-integer id)) 171 | (let ((row 172 | (db:with-connection () 173 | (db:get-paste id)))) 174 | (cond (row 175 | (setf (ht:header-out :content-type) "application/json") 176 | (jsown:to-json (apply #'paste-to-jsown row))) 177 | (t 178 | (values nil ht:+http-not-found+))))) 179 | 180 | (define-handler filter-artefacts/typed :post "/artefacts/typed" 181 | (let ((parameters '()) 182 | (options '(:limit 500))) 183 | (loop for (name . value) in (ht:post-parameters*) 184 | do (string-case (name) 185 | ("limit" 186 | (let ((parsed (parse-limit value))) 187 | (cond ((eq :invalid parsed) 188 | (msg :notice 189 | "~A -> /artefacts: invalid limit value: ~S" 190 | (ht:remote-addr*) value) 191 | (return-from filter-artefacts/typed 192 | (values nil ht:+http-bad-request+))) 193 | (t 194 | (setq options (list* :limit parsed options)))))) 195 | ("ip" 196 | (push (list :types '("IP-ADDRESS" 197 | "IP-SERVICE" 198 | "RESOLVED-IP-ADDRESS") 199 | :value value) 200 | parameters)) 201 | ("domain" 202 | (push (list :type "DOMAIN" 203 | :value value) 204 | parameters)) 205 | ("email" 206 | ;; XXX: For now all our usernames in credentials are 207 | ;; emails. 208 | (push (list :type "EMAIL" 209 | :value value) 210 | parameters)) 211 | ("credential" 212 | (push (list :type "CREDENTIAL" 213 | :value value) 214 | parameters)) 215 | ("cc-number" 216 | (push (list :type "BANK-CARD-NUMBER" 217 | :value value) 218 | parameters)) 219 | ("onion" 220 | (push (list :type "ONION" 221 | :value value) 222 | parameters)) 223 | ("sha1" 224 | (push (list :types '("BASE64-BLOB" "HEX-BLOB" "BINARY-BLOB") 225 | :value value) 226 | parameters)) 227 | ("any" 228 | (push (list :value value) 229 | parameters)) 230 | (t 231 | (msg :notice "~A -> /artefacts/typed: unknown parameter: ~A=~A" 232 | (ht:remote-addr*) name value) 233 | (return-from filter-artefacts/typed 234 | (values nil ht:+http-bad-request+))))) 235 | (let ((artefacts 236 | (db:with-connection () 237 | (apply #'db:search-artefacts-multi parameters options)))) 238 | (setf (ht:header-out :content-type) "application/json") 239 | (jsown:to-json 240 | (loop for artefact in artefacts 241 | collect (apply #'artefact-to-jsown* artefact)))))) 242 | -------------------------------------------------------------------------------- /src/log.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer.log) 2 | 3 | (defvar *categories* (make-instance 'cl-log:category-set)) 4 | 5 | (macrolet ((defcategory (category &optional expands-as) 6 | `(cl-log:defcategory ,category ,expands-as *categories*))) 7 | (defcategory :stat) 8 | (defcategory :fixme) 9 | (defcategory :critical) 10 | (defcategory :error (or :error :critical)) 11 | (defcategory :warning (or :warning :error)) 12 | (defcategory :notice (or :notice :warning)) 13 | (defcategory :info (or :info :notice :stat)) 14 | (defcategory :debug (or :debug :info :http :disc)) 15 | (defcategory :http) 16 | (defcategory :disc)) 17 | 18 | (defclass timestamped-message (cl-log:formatted-message) 19 | ()) 20 | 21 | (defmethod cl-log:format-message ((message timestamped-message)) 22 | (let ((timestamp (cl-log:message-timestamp message)) 23 | (*print-pretty* nil)) 24 | (multiple-value-bind (ss mm hh d m y) 25 | (decode-universal-time (cl-log:timestamp-universal-time timestamp)) 26 | (format nil "~D-~2,'0D-~2,'0DT~2,'0D:~2,'0D:~2,'0D.~6,'0D ~A ~?~&" 27 | y m d hh mm ss 28 | (cl-log:timestamp-fraction timestamp) 29 | (cl-log:message-category message) 30 | (cl-log:message-description message) 31 | (cl-log:message-arguments message))))) 32 | 33 | (defclass plain-message (cl-log:formatted-message) 34 | ()) 35 | 36 | (defmethod cl-log:format-message ((message plain-message)) 37 | (let ((*print-pretty* nil)) 38 | (format nil "~A ~?~&" 39 | (cl-log:message-category message) 40 | (cl-log:message-description message) 41 | (cl-log:message-arguments message)))) 42 | 43 | (defvar *log-manager* 44 | (make-instance 'cl-log:log-manager 45 | :message-class 'plain-message 46 | :categories *categories*)) 47 | 48 | (defun known-log-category-p (keyword) 49 | (let ((table (cl-log::category-set-categories *categories*))) 50 | (nth-value 1 (gethash keyword table)))) 51 | 52 | (defmacro msg (category description &rest args) 53 | (if (endp args) 54 | `(cl-log:log-manager-message *log-manager* ,category "~A" ,description) 55 | `(cl-log:log-manager-message *log-manager* ,category ,description ,@args))) 56 | 57 | (defun maybe-log (category description args) 58 | "Functional version of MSG." 59 | (when-let ((messengers 60 | (cl-log::category-messengers category :manager *log-manager*))) 61 | (cl-log::send-message *log-manager* messengers category description args))) 62 | 63 | (defun setup-logging (&key (filter :info) 64 | (message-class 'plain-message)) 65 | (when (and *log-manager* 66 | (not (eq message-class 67 | (cl-log:log-manager-message-class *log-manager*)))) 68 | (setf (cl-log:log-manager-message-class *log-manager*) message-class) 69 | (cl-log:invalidate-log-manager *log-manager*)) 70 | 71 | (cl-log:start-messenger 'cl-log:text-stream-messenger 72 | :name 'default 73 | :manager *log-manager* 74 | :stream (make-broadcast-stream *standard-output*) 75 | :filter filter)) 76 | 77 | (defmacro with-logged-warnings (&body body) 78 | `(handler-bind 79 | ((warning #'(lambda (condition) 80 | (msg :warning "~A" condition) 81 | (when-let ((restart (find-restart 'muffle-warning))) 82 | (invoke-restart restart))))) 83 | ,@body)) 84 | -------------------------------------------------------------------------------- /src/modules/misp.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.modules.misp 2 | (:use #:common-lisp 3 | #:pastelyzer) 4 | (:import-from #:pastelyzer.log 5 | #:msg) 6 | (:import-from #:alexandria 7 | #:when-let) 8 | (:import-from #:pastelyzer.rest 9 | #:jsown-call 10 | #:with-keepalive) 11 | (:import-from #:pastelyzer.config.package 12 | #:user-identifier) 13 | (:local-nicknames (#:sink #:pastelyzer.config.sink) 14 | (#:filter #:pastelyzer.config.filter) 15 | (#:util #:pastelyzer.config.util) 16 | (#:cfg #:pastelyzer.config.package) 17 | (#:usr #:pastelyzer.config.user))) 18 | 19 | (in-package #:pastelyzer.modules.misp) 20 | 21 | (defclass misp-server (sink:configuration) 22 | ((ssl-headers 23 | :initarg :ssl-headers 24 | :reader misp-server-ssl-headers 25 | :type list))) 26 | 27 | (defmethod slot-unbound 28 | ((class t) (instance misp-server) (name (eql 'ssl-headers))) 29 | (setf (slot-value instance 'ssl-headers) 30 | (loop for (attr . key) in '((:ca-cert . :ca-file) 31 | (:user-cert . :certificate) 32 | (:user-key-pass . :certificate-password) 33 | (:user-key . :key)) 34 | for value = (sink:attribute-value instance attr) 35 | when value 36 | collect key and collect value))) 37 | 38 | (defun api-call (endpoint path method &optional content) 39 | (let ((uri (sink:attribute-value endpoint :server)) 40 | (api-key (sink:attribute-value endpoint :api-key)) 41 | (ssl-headers (misp-server-ssl-headers endpoint))) 42 | (apply #'jsown-call uri method path 43 | :content content 44 | :additional-headers `(("Authorization" . ,api-key)) 45 | ssl-headers))) 46 | 47 | (defun find-sharing-group-id (misp name) 48 | (loop for json in (jsown:filter (api-call misp "sharing_groups/index" :get) 49 | "response" map "SharingGroup") 50 | when (string= (jsown:val json "name") name) 51 | do (return (jsown:val json "id")))) 52 | 53 | (defun add-event (misp info &key (distribution "1") 54 | (sharing-group nil) 55 | date 56 | (analysis "2") 57 | (threat-level "4")) 58 | (let ((content 59 | `(:obj 60 | ("analysis" . ,analysis) 61 | ("threat_level_id" . ,threat-level) 62 | ("info" . ,info) 63 | ,@(if sharing-group 64 | `(("distribution" . "4") 65 | ("sharing_group_id" . ,sharing-group)) 66 | `(("distribution" . ,distribution))) 67 | ,@(when date 68 | `(("date" . ,date)))))) 69 | (let* ((res (api-call misp "events" :post content)) 70 | (id (jsown:filter res "Event" "id")) 71 | (uuid (jsown:filter res "Event" "uuid"))) 72 | (msg :info "Created MISP event ~A (~A)" id uuid) 73 | (values id uuid)))) 74 | 75 | (defun publish-event (misp id &key (alert nil)) 76 | (api-call misp (format nil "events/~:[publish~;alert~]/~A" alert id) :post)) 77 | 78 | (defun add-event-attribute (misp event-id category type value &optional comment) 79 | (api-call misp (format nil "attributes/add/~A" event-id) 80 | :post 81 | `(:obj 82 | ("category" . ,category) 83 | ("type" . ,type) 84 | ("value" . ,value) 85 | ("comment" . ,comment)))) 86 | 87 | (defun add-tag (misp uuid tag) 88 | (api-call misp "tags/attachTagToObject" 89 | :post 90 | `(:obj 91 | ("uuid" . ,uuid) 92 | ("tag" . ,tag)))) 93 | 94 | (defvar *sink-prototype* nil) 95 | 96 | (defclass proto-misp (sink:prototype) 97 | ()) 98 | 99 | (defmethod sink:get-prototype 100 | ((name (eql (user-identifier "MISP-SINK")))) 101 | (or *sink-prototype* 102 | (setq *sink-prototype* (make-instance 'proto-misp)))) 103 | 104 | (defmethod sink:configuration-class ((proto proto-misp)) 105 | 'misp-server) 106 | 107 | (defmethod sink:parse-sink-attribute 108 | ((impl proto-misp) (attribute (eql :server)) &rest args) 109 | (list* attribute 110 | (sink:check-args impl attribute args 111 | '((:type string :transform puri:parse-uri))))) 112 | 113 | (defmethod sink:parse-sink-attribute 114 | ((impl proto-misp) (attribute symbol) &rest args) 115 | (let ((string-attrs '(:api-key :ca-cert :user-cert :user-key :user-key-pass))) 116 | (cond ((member attribute string-attrs) 117 | (list* attribute 118 | (sink:check-args impl attribute args '((:type string))))) 119 | (t 120 | (call-next-method))))) 121 | 122 | (defmethod sink:attribute-value ((cfg proto-misp) (attribute (eql :alert))) 123 | ;; Default value for :alert parameter. 124 | nil) 125 | 126 | (defmethod sink:parse-sink-attribute 127 | ((impl proto-misp) (attribute (eql :alert)) &rest args) 128 | (list* attribute (sink:check-args impl attribute args '((:type boolean))))) 129 | 130 | (defmethod sink:attribute-value ((cfg proto-misp) (attribute (eql :publish))) 131 | ;; Default value for :publish parameter. 132 | t) 133 | 134 | (defmethod sink:parse-sink-attribute 135 | ((impl proto-misp) (attribute (eql :publish)) &rest args) 136 | (list* attribute (sink:check-args impl attribute args '((:type boolean))))) 137 | 138 | (defmethod sink:parse-sink-attribute 139 | ((impl proto-misp) (attribute (eql :sharing-group)) &rest args) 140 | (list* attribute (sink:check-args impl attribute args '((:type string))))) 141 | 142 | (defmethod filter:generate-filter-function 143 | ((op (eql (cfg:user-identifier "UNIQUE-COUNT"))) &rest body) 144 | (check-type body (cons symbol null)) 145 | (let ((class (first body))) 146 | (filter:make-function unique-count (sink cont) 147 | (let* ((groups (sink:group-artefacts sink)) 148 | (group (assoc class groups))) 149 | (funcall cont 150 | (if group 151 | (hash-table-count (second group)) 152 | 0)))))) 153 | 154 | (defmethod sink:parse-sink-attribute 155 | ((impl proto-misp) (attribute (eql :title)) &rest args) 156 | (list attribute (util:parse-user-template args))) 157 | 158 | (defmethod sink:parse-action ((impl proto-misp) 159 | (scope (eql :document)) 160 | (action (eql (user-identifier "ADD-TAGS"))) 161 | &rest args) 162 | (loop for tag in args 163 | unless (stringp tag) 164 | do (error 'sink:invalid-attribute-type 165 | :sink impl 166 | :attribute :document-action 167 | :typespec 'string 168 | :value tag)) 169 | (lambda (document &key misp event-id uuid) 170 | (declare (ignore document event-id)) 171 | (dolist (tag args) 172 | (add-tag misp uuid tag)))) 173 | 174 | (defun make-action/add-attribute (&key category type value comment) 175 | (flet ((attribute-retriever (name datum) 176 | (let ((parsed (util:parse-dynamic-attribute datum name))) 177 | (if (functionp parsed) 178 | parsed 179 | (lambda (context) 180 | (declare (ignore context)) 181 | parsed))))) 182 | (let ((value (attribute-retriever :value value)) 183 | (comment (attribute-retriever :comment comment))) 184 | (lambda (item &key misp event-id uuid) 185 | (declare (ignore uuid)) 186 | (add-event-attribute misp event-id category type 187 | (funcall value item) 188 | (funcall comment item)))))) 189 | 190 | (defmethod sink:parse-action ((impl proto-misp) 191 | (scope (eql :document)) 192 | (action (eql (user-identifier "ADD-ATTRIBUTE"))) 193 | &rest args) 194 | (apply #'make-action/add-attribute args)) 195 | 196 | (defmethod sink:parse-action ((impl proto-misp) 197 | (scope (eql :item)) 198 | (action (eql (user-identifier "ADD-ATTRIBUTE"))) 199 | &rest args) 200 | (apply #'make-action/add-attribute args)) 201 | 202 | (defmethod sink:finish-sink 203 | ((proto proto-misp) (sink sink:sink)) 204 | (let* ((date (multiple-value-bind (sec min hour date month year) 205 | (get-decoded-time) 206 | (declare (ignore sec min hour)) 207 | (format nil "~4,'0D-~2,'0D-~2,'0D" year month date))) 208 | (title (sink:attribute-value sink :title)) 209 | (misp (sink:sink-configuration sink))) 210 | (with-keepalive 211 | ;; FIXME: The :sharing-group configuration parameter should be 212 | ;; renamed to :distribution. A named :sharing-group could be 213 | ;; just one option. There are also other parameters (like 214 | ;; :analysis and :threat-level) that need to be handled. 215 | (let* ((sharing-group (sink:attribute-value sink :sharing-group)) 216 | (sharing-group-id (find-sharing-group-id misp sharing-group))) 217 | (multiple-value-bind (event-id uuid) 218 | (add-event misp title 219 | :sharing-group sharing-group-id 220 | :date date) 221 | (sink:run-document-actions proto sink 222 | :misp misp 223 | :event-id event-id 224 | :uuid uuid) 225 | ;; XXX: We only want to do this for _unique_ artefacts. 226 | ;; This is probalby where the :deduplicate configuration 227 | ;; option comes in. 228 | (sink:run-item-actions proto sink 229 | :misp misp 230 | :event-id event-id 231 | :uuid uuid) 232 | (when (sink:attribute-value sink :publish) 233 | (publish-event misp event-id 234 | :alert (sink:attribute-value sink :alert)))))))) 235 | -------------------------------------------------------------------------------- /src/package.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.log 2 | (:use #:common-lisp) 3 | (:local-nicknames (#:cl-log #:com.ravenbrook.common-lisp-log)) 4 | (:import-from #:alexandria 5 | #:when-let) 6 | (:export #:msg 7 | #:maybe-log 8 | #:known-log-category-p 9 | #:setup-logging 10 | #:with-logged-warnings 11 | #:plain-message 12 | #:timestamped-message)) 13 | 14 | (defpackage #:pastelyzer.db 15 | (:use #:common-lisp) 16 | (:import-from #:pastelyzer.log 17 | #:msg) 18 | (:import-from #:postmodern 19 | #:with-transaction) 20 | (:local-nicknames (#:pg #:cl-postgres)) 21 | (:export #:*db-params* 22 | #:*current-version-id* 23 | #:initialize 24 | #:with-connection 25 | #:with-auto-reconnect 26 | #:call-with-auto-reconnect 27 | #:with-transaction 28 | #:insert-content-fix 29 | #:register-broken-content 30 | #:insert-content 31 | #:store-content 32 | #:content-body 33 | #:insert-paste 34 | #:store-paste 35 | #:get-paste 36 | #:paste-with-content 37 | #:initiate-analysis 38 | #:finish-analysis 39 | #:flush-content-artefacts 40 | #:register-artefact 41 | #:map-unprocessed-content-ids 42 | #:content-artefacts 43 | #:content-sources 44 | #:search-artefacts 45 | #:search-artefacts-multi 46 | 47 | #:congested-content 48 | #:congested-content-id)) 49 | 50 | (defpackage #:pastelyzer.util 51 | (:use #:common-lisp) 52 | (:import-from #:alexandria 53 | #:array-index 54 | #:array-length 55 | #:when-let) 56 | (:export #:alphabet 57 | #:sub-alphabet-p 58 | #:histogram 59 | #:show-histogram 60 | #:entropy 61 | #:group 62 | #:partition 63 | #:trim-space 64 | #:whitespace-char-p 65 | #:one-line 66 | #:string-context-before 67 | #:string-context-after 68 | #:dsubseq 69 | #:starts-with-subseq 70 | #:ends-with-subseq 71 | #:mixed-case-p 72 | #:summarize-numbers)) 73 | 74 | (defpackage #:pastelyzer 75 | (:use #:common-lisp) 76 | (:import-from #:alexandria 77 | #:array-index 78 | #:array-length 79 | #:compose 80 | #:define-constant 81 | #:delete-from-plist 82 | #:if-let 83 | #:hash-table-values 84 | #:read-file-into-byte-vector 85 | #:read-stream-content-into-byte-vector 86 | #:read-stream-content-into-string 87 | #:when-let 88 | #:when-let* 89 | #:with-gensyms) 90 | (:import-from #:split-sequence 91 | #:split-sequence) 92 | (:import-from #:string-case 93 | #:string-case) 94 | (:import-from #:pastelyzer.log 95 | #:msg 96 | #:maybe-log 97 | #:setup-logging 98 | #:with-logged-warnings) 99 | (:import-from #:pastelyzer.util 100 | #:alphabet 101 | #:sub-alphabet-p 102 | #:group 103 | #:whitespace-char-p 104 | #:one-line 105 | #:string-context-after 106 | #:string-context-before 107 | #:dsubseq 108 | #:starts-with-subseq 109 | #:ends-with-subseq 110 | #:summarize-numbers) 111 | #+sbcl 112 | (:import-from #:sb-concurrency 113 | #:make-mailbox 114 | #:mailbox-empty-p 115 | #:receive-message 116 | #:send-message) 117 | (:local-nicknames (#:pg #:cl-postgres) 118 | (#:ht #:hunchentoot) 119 | (#:hs #:hunchensocket) 120 | (#:sq #:cl-speedy-queue) 121 | (#:db #:pastelyzer.db)) 122 | (:export #:run 123 | #:run-standalone 124 | #:process 125 | 126 | #:binary-fragment 127 | #:fragment 128 | #:string-fragment 129 | #:fragment-body 130 | 131 | #:artefact 132 | #:artefact-source 133 | #:artefact-source-seq 134 | #:artefact-source-seq-start 135 | #:artefact-source-seq-end 136 | #:artefact-source-seq-bounds 137 | #:artefact-description 138 | #:artefact-context-before 139 | #:artefact-context-after 140 | #:artefact-key 141 | #:artefact-note 142 | #:important-artefact-p 143 | #:string-artefact 144 | #:m3u-entry 145 | #:windows-internal 146 | #:email 147 | #:credential 148 | #:credential-username 149 | #:credential-passphrase 150 | #:ip-address 151 | #:ip-service 152 | #:ip-service-address 153 | #:ip-service-port 154 | #:resolved-ip-address 155 | #:domain 156 | #:onion 157 | #:uri 158 | #:bank-card-number 159 | #:bank-card-number-digits 160 | 161 | #:binary-artefact 162 | #:compressed-blob 163 | #:compressed-blob-bytes 164 | #:compressed-blob-method 165 | #:encoded-string 166 | #:encoded-string-encoding 167 | 168 | #:broken-fragment 169 | #:broken-fragment-datum 170 | #:broken-fragment-locations 171 | #:broken-utf-8 172 | #:artefact-discarded 173 | 174 | #:embedded-binary 175 | #:embedded-binary-bytes 176 | #:binary-blob 177 | #:hex-blob 178 | #:base64-blob 179 | 180 | #:job 181 | #:job-subject 182 | #:job-artefacts 183 | #:register-artefact 184 | #:finish-job 185 | #:resolve-domains-p 186 | 187 | #:content 188 | #:content-id 189 | #:content-body 190 | 191 | #:paste 192 | #:paste-id 193 | #:paste-provider 194 | #:paste-provider-id 195 | #:circl-paste 196 | #:web-paste 197 | #:paste-source 198 | #:paste-origins 199 | #:remote-content 200 | #:remote-content-location 201 | #:http-content 202 | #:link-only-http-content 203 | 204 | #:async 205 | #:*announcers*)) 206 | 207 | (defpackage #:fmt 208 | (:use #:common-lisp) 209 | (:export #:bytes 210 | #:nbytes)) 211 | -------------------------------------------------------------------------------- /src/rest.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.rest 2 | (:use #:common-lisp) 3 | (:export #:rest-call 4 | #:json-call 5 | #:jsown-call 6 | #:with-keepalive)) 7 | 8 | (in-package #:pastelyzer.rest) 9 | 10 | (defvar *user-agent* "Pastelyzer") 11 | 12 | (defvar *reuse-connection* nil) 13 | 14 | (defmacro with-keepalive (&body body) 15 | "Requests in the scope reuse connection (if possible)." 16 | `(let ((*reuse-connection* t)) 17 | ,@body)) 18 | 19 | (defun rest-call (uri method path &rest keys 20 | &key user-agent 21 | &allow-other-keys) 22 | (let ((uri (puri:merge-uris path uri))) 23 | (multiple-value-bind (body status headers real-uri stream closep reason) 24 | (apply #'drakma:http-request uri 25 | :method method 26 | :close (if *reuse-connection* nil t) 27 | :keep-alive (if *reuse-connection* t nil) 28 | :stream (if (streamp *reuse-connection*) *reuse-connection* nil) 29 | :user-agent (or user-agent *user-agent*) 30 | keys) 31 | (declare (ignore headers real-uri)) 32 | (unwind-protect 33 | (cond ((= 200 status) 34 | body) 35 | (t 36 | (error "~A: ~S (~A)" uri status reason))) 37 | (cond (closep 38 | (close stream)) 39 | (*reuse-connection* 40 | (setq *reuse-connection* stream))))))) 41 | 42 | (defun json-call (uri method path &rest keys) 43 | (let ((drakma:*text-content-types* 44 | (list* '("application" . "json") drakma:*text-content-types*))) 45 | (declare (dynamic-extent drakma:*text-content-types*)) 46 | (apply #'rest-call uri method path 47 | :content-type "application/json" 48 | :accept "application/json" 49 | keys))) 50 | 51 | (defun jsown-call (uri method path &rest keys 52 | &key content 53 | &allow-other-keys) 54 | (let* ((body (apply #'json-call uri method path 55 | :content (when content (jsown:to-json content)) 56 | keys)) 57 | (result (jsown:parse body))) 58 | (if (and (jsown:keyp result "errors") 59 | (jsown:val result "errors")) 60 | (error "~A to ~A~@[ with ~S~] failed: ~S" 61 | method path content (jsown:val result "errors")) 62 | result))) 63 | -------------------------------------------------------------------------------- /src/sys.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:sys 2 | (:use :common-lisp) 3 | #+sbcl 4 | (:import-from #:sb-posix 5 | #:mkdtemp 6 | #:mkstemp) 7 | #+ccl 8 | (:import-from #:ccl 9 | #:delete-directory) 10 | (:export #:isatty 11 | #:mkdtemp 12 | #:mkstemp 13 | #:delete-directory 14 | #:open-tmpfile 15 | #:run-program 16 | #:with-temporary-directory 17 | #:with-temporary-file 18 | 19 | #:*tmpdir* 20 | #:*home* 21 | #:initialize-directories)) 22 | 23 | (in-package #:sys) 24 | 25 | (defvar *tmpdir* #p"/tmp/" 26 | "Where temporary files and directories will be created.") 27 | 28 | (defvar *home* #p"" 29 | "Pastelyzer home directory.") 30 | 31 | (defun initialize-directories (&key home tmp) 32 | (setq *home* (probe-file (or home "./"))) 33 | (setq *tmpdir* (or tmp 34 | (let ((path (uiop:getenv "TMPDIR"))) 35 | (when path 36 | (pathname path))) 37 | (probe-file "/tmp/") 38 | *home*))) 39 | 40 | #+sbcl 41 | (defgeneric isatty (stream) 42 | (:method ((other t)) 43 | nil) 44 | (:method ((fd integer)) 45 | (not (zerop (sb-unix:unix-isatty fd)))) 46 | (:method ((stream synonym-stream)) 47 | (isatty (symbol-value (synonym-stream-symbol stream)))) 48 | (:method ((stream sb-sys:fd-stream)) 49 | (sb-sys:fd-stream-fd stream))) 50 | 51 | #+ccl 52 | (defgeneric isatty (stream) 53 | (:method ((other t)) 54 | nil) 55 | (:method ((fd integer)) 56 | (ccl::isatty fd)) 57 | (:method ((stream synonym-stream)) 58 | (isatty (symbol-value (synonym-stream-symbol stream)))) 59 | (:method ((stream stream)) 60 | (isatty (ccl::stream-device stream nil))) 61 | (:method ((stream ccl::two-way-stream)) 62 | (and (isatty (ccl::stream-device stream :input)) 63 | (isatty (ccl::stream-device stream :output))))) 64 | 65 | #-(or sbcl ccl) 66 | (defun isatty (stream) 67 | (declare (ignore stream)) 68 | nil) 69 | 70 | #+ccl 71 | (defun mkstemp (template) 72 | (ccl:with-filename-cstrs ((cstr template)) 73 | (let ((fd (#_mkstemp cstr))) 74 | (if (< fd 0) 75 | (error (ccl::%strerror (ccl::%get-errno))) 76 | (values fd (ccl:get-foreign-namestring cstr)))))) 77 | 78 | #+ccl 79 | (defun mkdtemp (template) 80 | (ccl:with-filename-cstrs ((cstr template)) 81 | (let ((ptr (#_mkdtemp cstr))) 82 | (if (ccl:%null-ptr-p ptr) 83 | (error (ccl::%strerror (ccl::%get-errno))) 84 | (ccl:get-foreign-namestring ptr))))) 85 | 86 | #+sbcl 87 | (defun delete-directory (path) 88 | (sb-ext:delete-directory path :recursive t)) 89 | 90 | (defmacro with-temporary-directory ((var &key (auto-cleanup t) 91 | (in *tmpdir*) 92 | (prefix "pastelyzer-")) 93 | &body body 94 | &environment env) 95 | (let ((namestring (gensym "NAMESTRING")) 96 | (name-form (if (constantp prefix env) 97 | (concatenate 'base-string prefix "XXXXXX") 98 | `(concatenate 'base-string ,prefix "XXXXXX")))) 99 | `(let* ((,namestring 100 | (sys:mkdtemp 101 | (namestring (merge-pathnames (make-pathname :name ,name-form) 102 | ,in)))) 103 | (,var (probe-file ,namestring))) 104 | ,(if auto-cleanup 105 | `(unwind-protect 106 | (progn ,@body) 107 | (sys:delete-directory ,var)) 108 | `(multiple-value-prog1 109 | (progn ,@body) 110 | (sys:delete-directory ,var)))))) 111 | 112 | (defun %make-fd-stream (fd &key (direction :io) 113 | (element-type :default) 114 | (external-format :utf-8) 115 | (auto-close t) 116 | path) 117 | #+sbcl 118 | (sb-sys:make-fd-stream fd 119 | :pathname path 120 | :input (or (eq :input direction) 121 | (eq :io direction)) 122 | :output (or (eq :output direction) 123 | (eq :io direction)) 124 | :element-type element-type 125 | :external-format external-format 126 | :auto-close auto-close) 127 | #+ccl 128 | (let* ((args (case element-type 129 | (:default 130 | (list :element-type '(unsigned-byte 8) 131 | :character-p t)) 132 | (t 133 | (list :element-type element-type)))) 134 | (result 135 | (apply #'ccl::make-fd-stream 136 | fd 137 | :class 'ccl::basic-file-stream 138 | :direction direction 139 | :encoding external-format 140 | :auto-close auto-close 141 | :sharing :external 142 | args))) 143 | (when path 144 | (setf (ccl::stream-filename result) path)) 145 | result) 146 | #-(or sbcl ccl) 147 | (error "Don't know how to make an FD-stream in ~A" 148 | (lisp-implementation-type))) 149 | 150 | (defun open-tmpfile (directory prefix 151 | &rest args 152 | &key (direction :io) 153 | (element-type :default) 154 | (external-format :utf-8) 155 | (auto-close t)) 156 | (declare (ignorable direction element-type external-format auto-close)) 157 | (let* ((name (concatenate 'base-string prefix "XXXXXX")) 158 | (path (merge-pathnames (make-pathname :name name) 159 | directory))) 160 | (multiple-value-bind (fd real-path) 161 | (sys:mkstemp (namestring path)) 162 | (apply #'%make-fd-stream fd :path (pathname real-path) args)))) 163 | 164 | (defun call-with-temporary-file-stream (function &key template element-type) 165 | (let ((path (namestring template))) 166 | (assert (let ((mm (mismatch "XXXXXX" path :from-end t))) 167 | (or (null mm) (zerop mm)))) 168 | (multiple-value-bind (fd real-path) 169 | (sys:mkstemp path) 170 | (let ((stream (%make-fd-stream fd :direction :output 171 | :element-type element-type 172 | :path (pathname real-path)))) 173 | (unwind-protect 174 | (funcall function stream) 175 | (close stream)))))) 176 | 177 | (defmacro with-temporary-file ((var &key directory element-type 178 | (prefix "pastelyzer-")) 179 | &body body &environment env) 180 | (let* ((name-form 181 | (if (constantp prefix env) 182 | (concatenate 'string prefix "XXXXXX") 183 | `(concatenate 'string (or ,prefix "pastelyzer-") "XXXXXX"))) 184 | (template-form 185 | (if directory 186 | `(merge-pathnames (make-pathname :name ,name-form) 187 | ,directory) 188 | `(make-pathname :name ,name-form)))) 189 | `(call-with-temporary-file-stream (lambda (,var) ,@body) 190 | :template ,template-form 191 | :element-type ,element-type))) 192 | #+sbcl 193 | (defun run-program (command 194 | &key environment stdin stdout stderr directory timeout) 195 | (declare (ignore timeout)) 196 | (check-type stdin (or null pathname)) 197 | (check-type stdout (or null stream)) 198 | (check-type stderr (or null stream)) 199 | (check-type directory (or null pathname)) 200 | (let* ((env (mapcar (lambda (cons) 201 | (concatenate 'string (car cons) "=" (cdr cons))) 202 | environment)) 203 | (proc (sb-ext:run-program (first command) 204 | (rest command) 205 | :search t 206 | :directory directory 207 | :environment env 208 | :input stdin 209 | :output stdout 210 | :error stderr 211 | :wait t))) 212 | (when stdout 213 | (finish-output stdout)) 214 | (when stderr 215 | (finish-output stderr)) 216 | (sb-ext:process-exit-code proc))) 217 | 218 | #+ccl 219 | (defun run-program (command 220 | &key environment stdin stdout stderr directory timeout) 221 | (declare (ignore timeout)) 222 | (check-type stdin (or null pathname)) 223 | (check-type stdout (or null stream)) 224 | (check-type stderr (or null stream)) 225 | (check-type directory (or null pathname)) 226 | (let ((proc (ccl:run-program (first command) 227 | (rest command) 228 | :directory directory 229 | :env environment 230 | :input stdin 231 | :output stdout 232 | :error stderr 233 | :wait t))) 234 | (when stdout 235 | (finish-output stdout)) 236 | (when stderr 237 | (finish-output stderr)) 238 | (ccl:external-process-status proc))) 239 | -------------------------------------------------------------------------------- /src/util.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer.util) 2 | 3 | (defgeneric histogram (source)) 4 | (defgeneric alphabet (source)) 5 | 6 | (defmethod histogram ((source string)) 7 | (let ((table (make-hash-table :test 'eql :size 67))) 8 | (loop for char across source 9 | do (incf (gethash (char-code char) table 0))) 10 | table)) 11 | 12 | (defmethod histogram ((source vector)) 13 | (let ((table (make-hash-table :test 'eql :size 67))) 14 | (loop for byte across source 15 | do (incf (gethash byte table 0))) 16 | table)) 17 | 18 | (defmethod histogram ((source stream)) 19 | (let ((table (make-hash-table :test 'eql :size 67))) 20 | (etypecase (stream-element-type source) 21 | (character 22 | (loop for char = (read-char source nil nil) 23 | while char 24 | do (incf (gethash (char-code char) table 0)))) 25 | ((vector (unsigned-byte 8)) 26 | (loop for byte = (read-byte source nil nil) 27 | while byte 28 | do (incf (gethash byte table 0))))) 29 | table)) 30 | 31 | (defmethod histogram ((source pathname)) 32 | (with-open-file (in source :direction :input :element-type 'character) 33 | (histogram in))) 34 | 35 | ;;; XXX: Would be nice if we could make this into a function that can 36 | ;;; be used with FORMAT. 37 | (defun bar (stream width n max &optional (graphics "▏▎▍▌▋▊▉█")) 38 | "Draw an N/MAX bar no wider than WIDTH using charecters from GRAPHICS." 39 | (declare (type number width n max) 40 | (type string graphics)) 41 | (assert (<= 1 (length graphics)) 42 | (graphics) 43 | "Invalid graphics provided: ~S" graphics) 44 | (let* ((nchars (length graphics)) 45 | (inc (/ 1 nchars 2))) 46 | (multiple-value-bind (full part) 47 | (truncate (* width (/ n max))) 48 | (loop with char = (char graphics (1- nchars)) 49 | repeat full 50 | do (write-char char stream)) 51 | (multiple-value-bind (index rem) 52 | (truncate (* nchars (- part inc))) 53 | (when (<= 0 rem) 54 | (write-char (char graphics index) stream))))) 55 | nil) 56 | 57 | (defmethod show-histogram ((table cons) 58 | &key (stream *standard-output*) 59 | (width (or *print-right-margin* 80)) 60 | (sort #'<) 61 | (key-base 10) 62 | (graphics "▏▎▍▌▋▊▉█") 63 | ignore-keys) 64 | ;; Alterantive graphics: "⠆⡇⡷⣿", "⣀⣄⣤⣦⣶⣷⣿". 65 | (let* ((entries (cond (ignore-keys 66 | (remove-if (lambda (item) 67 | (find item ignore-keys)) 68 | table 69 | :key #'car)) 70 | (t 71 | (copy-list table)))) 72 | (max-key (reduce #'max entries :key #'car :initial-value 0)) 73 | (max-count (reduce #'max entries :key #'cdr :initial-value 0))) 74 | 75 | (when entries 76 | (setf entries (sort entries sort :key #'car)) 77 | 78 | (fresh-line stream) 79 | (loop with count-width = (ceiling (log (1+ max-count) 10)) 80 | with key-width = (ceiling (log (1+ max-key) key-base)) 81 | with room = (max 1 (- width key-width 3 count-width 1)) 82 | with fmt = (ecase key-base 83 | (2 (formatter "~V,'0B ~:[~* ~;~C~] ~VD ")) 84 | (8 (formatter "~V,'0O ~:[~* ~;~C~] ~VD ")) 85 | (10 (formatter "~V,' D ~:[~* ~;~C~] ~VD ")) 86 | (16 (formatter "~V,'0X ~:[~* ~;~C~] ~VD "))) 87 | for (key . count) in entries 88 | for char = (code-char key) 89 | do (funcall fmt stream 90 | key-width key 91 | (graphic-char-p char) char 92 | count-width count) 93 | (bar stream room count max-count graphics) 94 | (terpri stream))) 95 | entries)) 96 | 97 | (defmethod show-histogram ((table hash-table) 98 | &rest keys 99 | &key stream width sort key-base ignore-keys graphics) 100 | (declare (ignorable stream width sort key-base ignore-keys graphics)) 101 | (apply #'show-histogram 102 | (alexandria:hash-table-alist table) 103 | keys)) 104 | 105 | (defmethod alphabet ((source string)) 106 | (alphabet (histogram source))) 107 | 108 | (defmethod alphabet ((source hash-table)) 109 | (let ((result (make-string (hash-table-count source)))) 110 | (loop for key being each hash-key of source 111 | for i upfrom 0 112 | do (setf (schar result i) (code-char key))) 113 | (sort result #'char<))) 114 | 115 | (defun sub-alphabet-p (string1 string2 &aux (len1 (length string1)) 116 | (len2 (length string2))) 117 | "Returns T if STRING1 is a sub-alphabet of STRING2. The strings are 118 | assumed to be sorted." 119 | (declare (type string string1 string2)) 120 | (let ((i 0) 121 | (j 0)) 122 | (declare (type array-length i j)) 123 | (loop 124 | (cond ((= i len1) 125 | (return t)) 126 | ((= j len2) 127 | (return nil)) 128 | ((char= (char string1 i) 129 | (char string2 j)) 130 | (incf i))) 131 | (incf j)))) 132 | 133 | (defun entropy (seq) 134 | (let ((table (make-hash-table)) 135 | (length (coerce (length seq) 'float))) 136 | (map nil (lambda (x) 137 | (incf (the fixnum (gethash x table 0)))) 138 | seq) 139 | (- (loop for freq fixnum being each hash-value in table 140 | for q of-type (float 0.0 1.0) = (/ freq length) 141 | sum (the float (* q (log q 2))))))) 142 | 143 | (defun group (list &key ((:key key-fn) #'identity) 144 | ((:test test-fn) #'eql)) 145 | (declare (type (or symbol function) key-fn test-fn)) 146 | (let ((result ())) 147 | (dolist (item list result) 148 | (let* ((key (funcall key-fn item)) 149 | (cons (assoc key result :test test-fn))) 150 | (cond (cons 151 | (push item (cdr cons))) 152 | (t 153 | (setf result (acons key (list item) result)))))))) 154 | 155 | (defun partition (list fn) 156 | (loop for item in list 157 | when (funcall fn item) 158 | collect item into a 159 | else 160 | collect item into b 161 | finally (return (values a b)))) 162 | 163 | (defparameter *whitespace-chars* 164 | (coerce '(#\space #\linefeed #\tab #\return #\page) 'base-string)) 165 | 166 | (defun whitespace-char-p (char) 167 | (find char *whitespace-chars*)) 168 | 169 | (defun trim-space (string side &optional (chars *whitespace-chars*)) 170 | (ecase side 171 | (:both (string-trim chars string)) 172 | (:left (string-left-trim chars string)) 173 | (:right (string-right-trim chars string)))) 174 | 175 | (defun visible-char-p (char) 176 | (and (graphic-char-p char) 177 | (case char 178 | ((#\tab #\linefeed #\return #\page) 179 | nil) 180 | (otherwise 181 | t)))) 182 | 183 | (defun substitute-invisible (string replacement) 184 | (declare (type (or null character) replacement)) 185 | (if replacement 186 | (nsubstitute-if-not replacement #'visible-char-p string) 187 | string)) 188 | 189 | (defun one-line (string &key (start 0) 190 | (end nil) 191 | (limit 24) 192 | (replace-invisible #\.) 193 | (continuation "...") 194 | (mode :shorten)) 195 | (declare (type string string) 196 | (type (and fixnum unsigned-byte) start limit) 197 | (type (or null character) replace-invisible)) 198 | (when (null end) 199 | (setq end (length string))) 200 | (flet ((clean (start end) 201 | (substitute-invisible (subseq string start end) replace-invisible))) 202 | (if (<= (- end start) limit) 203 | (clean start end) 204 | (ecase mode 205 | (:shorten 206 | (concatenate 'string 207 | (clean start (+ start limit)) 208 | continuation)) 209 | (:squeeze 210 | (let ((half (truncate limit 2))) 211 | (concatenate 'string 212 | (clean start (+ start half (if (oddp limit) 1 0))) 213 | continuation 214 | (clean (- end half) end)))))))) 215 | 216 | (defun string-context-before (string position &key (after 0) 217 | (limit 50) 218 | (bol nil) 219 | (trim-space t) 220 | (replace-invisible #\.)) 221 | (let ((start (if limit 222 | (max after (- position limit)) 223 | after))) 224 | (when bol 225 | (when-let (pos (position #\newline string :start start :end position 226 | :from-end t)) 227 | (setq start (1+ pos)))) 228 | (when trim-space 229 | (when-let (pos (position-if-not #'whitespace-char-p string 230 | :start start :end position)) 231 | (setq start pos))) 232 | (substitute-invisible (subseq string start position) replace-invisible))) 233 | 234 | (defun string-context-after (string position &key (before nil) 235 | (limit 50) 236 | (eol nil) 237 | (trim-space t) 238 | (replace-invisible #\.)) 239 | (let* ((before (if before before (length string))) 240 | (end (if limit 241 | (min before (+ position limit)) 242 | before))) 243 | (when eol 244 | (when-let (pos (position #\newline string :start position :end end)) 245 | (setq end pos))) 246 | (when trim-space 247 | (when-let (pos (position-if-not #'whitespace-char-p string 248 | :start position :end end 249 | :from-end t)) 250 | (setq end (1+ pos)))) 251 | (substitute-invisible (subseq string position end) replace-invisible))) 252 | 253 | (defun dsubseq (array start end) 254 | (declare (type array-index start end)) 255 | (make-array (- end start) 256 | :element-type (array-element-type array) 257 | :displaced-to array 258 | :displaced-index-offset start)) 259 | 260 | (defun starts-with-subseq (prefix sequence &rest keys) 261 | (let ((mismatch (apply #'mismatch prefix sequence keys))) 262 | (or (null mismatch) 263 | (= (length prefix) mismatch)))) 264 | 265 | (defun ends-with-subseq (suffix sequence &rest keys) 266 | (let ((mismatch (apply #'mismatch suffix sequence :from-end t keys))) 267 | (or (null mismatch) 268 | (zerop mismatch)))) 269 | 270 | (defun mixed-case-p (string &optional (start 0) (end nil)) 271 | (loop with lower fixnum = 0 272 | with upper fixnum = 0 273 | for index of-type array-index from start below (or end (length string)) 274 | for char of-type character = (char string index) 275 | when (both-case-p char) 276 | do (if (lower-case-p char) 277 | (incf lower) 278 | (incf upper)) 279 | (when (and (< 0 lower) (< 0 upper)) 280 | (return t)) 281 | finally (return nil))) 282 | 283 | (defun summarize-numbers (numbers) 284 | "NUMBERS is assumed to be sorted (order does not matter)." 285 | (with-output-to-string (out) 286 | (flet ((range (stream start end) 287 | (format stream "~:[~D-~;~*~]~D" (= start end) start end))) 288 | (prog ((start nil) 289 | (last nil) 290 | (n 0)) 291 | (go first) 292 | next 293 | (setq last n) 294 | first 295 | (when (null numbers) 296 | (go done)) 297 | (setq n (pop numbers)) 298 | (when (null start) 299 | (setq start n) 300 | (go next)) 301 | (when (<= -1 (- last n) 1) 302 | (go next)) 303 | (range out start last) 304 | (write-string ", " out) 305 | (setq start n) 306 | (go next) 307 | done 308 | (cond (start 309 | (range out start last)) 310 | (last 311 | (princ last out))) 312 | (return))))) 313 | -------------------------------------------------------------------------------- /src/vars.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer) 2 | 3 | ;;; System release version. 4 | (defvar *release* "0.11") 5 | 6 | ;;; Set by build script. 7 | (defvar *build-id* nil) 8 | 9 | (defvar *circl-zmq-address*) 10 | 11 | (defvar *resolve-domains* nil 12 | "Whether to resolve domains.") 13 | 14 | (defvar *log-artefacts-threshold* 3 15 | "Include artefacts in the log output if there are fewer than this 16 | number of them") 17 | 18 | (defvar *acceptor* nil) 19 | 20 | (defvar *web-server-external-uri* 21 | (make-instance 'puri:uri :scheme :http :host "localhost")) 22 | 23 | ;; XXX: This is a hack to avoid resolving same domains over and over 24 | ;; again (in the scope of a single paste for now; see PROCESS generic 25 | ;; function). Until we have our own resolver in place. 26 | (defvar *seen-hostnames* nil) 27 | 28 | (defvar *big-fragment-bytes* (* 1024 1024) 29 | "Size of a fragment that is considered big and is processed in a 30 | separate queue.") 31 | 32 | (defvar *huge-fragment-bytes* (* 16 1024 1024) 33 | "Size of a fragment that is considered too big to process.") 34 | 35 | (defvar *ignored-paste-sites* 36 | '() 37 | "Paste sites to ignore when re-fetching broken pastes.") 38 | 39 | (defvar *default-http-user-agent* nil) 40 | -------------------------------------------------------------------------------- /support/pastelyzer.service: -------------------------------------------------------------------------------- 1 | ; -*- mode: ini -*- 2 | 3 | [Unit] 4 | Description="Pastelyzer" 5 | After=postgresql.service 6 | ; Pastelyzer can survive temporary loss of PostgreSQL, and does not 7 | ; have to be brought down when PostgreSQL is being restarted. 8 | ; Requires=postgresql.service 9 | 10 | [Service] 11 | WorkingDirectory=%h/ 12 | Environment=DB_NAME=pastelyzer 13 | Environment=DB_PASS=XXXXXXXXXX 14 | Environment=SMTP_SERVER=localhost 15 | Environment=SMTP_FROM=pastelyzer@your.org 16 | Environment=MISP_SERVER=https://misp.your.org 17 | Environment=MISP_API_KEY=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 18 | Environment=MISP_CA_CERT=%h/misp/ca.pem 19 | Environment=MISP_USER_CERT=%h/misp/misp.crt.pem 20 | Environment=MISP_USER_KEY=%h/misp/misp.key.pem 21 | Environment=MISP_USER_KEY_PASS=XXXXXXXXXX 22 | Environment=IGNORED_PASTESITES=pastebin.com 23 | Environment="HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0" 24 | ExecStart=/path/to/pastelyzer \ 25 | --log-level info \ 26 | --networks-file %h/local.net \ 27 | --tlds-file %h/tlds.txt \ 28 | --important-cc-bins %h/cc-bins.txt \ 29 | --interesting-tlds .xx,.yy,.zz \ 30 | --server \ 31 | --server-port 80 \ 32 | --server-ext-host pastelyzer.your.org \ 33 | --config %h/pastelyzer.conf 34 | Restart=always 35 | RestartSec=1 36 | NoNewPrivileges=true 37 | 38 | [Install] 39 | WantedBy=default.target 40 | -------------------------------------------------------------------------------- /tests/circl-paste.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer.tests) 2 | 3 | (suite 'circl-paste) 4 | 5 | (defmacro test-circl-paste ((site id) exp-name exp-url &optional exp-raw-url) 6 | (let ((provider-id (format nil "archive/~A/2001/01/01/~A.gz" site id))) 7 | `(let* ((paste (make-instance 'pastelyzer::circl-paste 8 | :id nil 9 | :content nil 10 | :provider "circl" 11 | :provider-id ,provider-id))) 12 | (multiple-value-bind (name url raw-url) 13 | (pastelyzer::paste-source paste) 14 | (setq url (puri:render-uri url nil)) 15 | (when raw-url 16 | (setq raw-url (puri:render-uri raw-url nil))) 17 | (is (string= ,exp-name name) 18 | "Expected name to be '~A', got '~A'" ,exp-name name) 19 | (is (string= ,exp-url url) 20 | "Expected URL to be '~A', got '~A'" ,exp-url url) 21 | (if ,exp-raw-url 22 | (is (string= ,exp-raw-url raw-url) 23 | "Expected raw URL to be '~A', got '~A'" ,exp-raw-url raw-url) 24 | (is (null raw-url) 25 | "Expected raw URL to be NIL, got '~A'" raw-url)))))) 26 | 27 | (test circl-paste.justpaste.it () 28 | (test-circl-paste ("justpaste.it" "19p4m") 29 | "justpaste.it" 30 | "https://justpaste.it/19p4m")) 31 | 32 | (test circl-paste.pastebin.fr () 33 | (test-circl-paste ("pastebin.fr" "50512") 34 | "pastebin.fr" 35 | "http://pastebin.fr/50512")) 36 | 37 | (test circl-paste.codepad.org () 38 | (test-circl-paste ("codepad.org" "B4d6aOXk") 39 | "codepad.org" 40 | "http://codepad.org/B4d6aOXk")) 41 | 42 | (test circl-paste.paste.kde.org () 43 | (test-circl-paste ("paste.kde.org" "py1tgdbtj") 44 | "paste.kde.org" 45 | "https://paste.kde.org/py1tgdbtj")) 46 | 47 | (test circl-paste.paste.frubar.net () 48 | (test-circl-paste ("paste.frubar.net" "27699") 49 | "paste.frubar.net" 50 | "http://paste.frubar.net/27699")) 51 | 52 | (test circl-paste.paste.org.ru () 53 | (test-circl-paste ("paste.org.ru" "3fjc56") 54 | "paste.org.ru" 55 | "http://paste.org.ru/?3fjc56")) 56 | 57 | (test circl-paste.slexy.org () 58 | (test-circl-paste ("slexy.org" "s2uWkaBEHk") 59 | "slexy.org" 60 | "https://slexy.org/view/s2uWkaBEHk")) 61 | 62 | (test circl-paste.snipplr.com () 63 | (test-circl-paste ("snipplr.com" "311072_oracle-asciistr-function_") 64 | "snipplr.com" 65 | "http://snipplr.com/view/311072/oracle-asciistr-function/")) 66 | 67 | (test circl-paste.gist.github.com () 68 | (test-circl-paste ("gist.github.com" "micmn_dfb1ea9c9b0ccf37f6112b5f1e21a489") 69 | "gist.github.com" 70 | "https://gist.github.com/micmn/dfb1ea9c9b0ccf37f6112b5f1e21a489")) 71 | 72 | (test circl-paste.pastebin.com_pro () 73 | (test-circl-paste ("pastebin.com_pro" "tE9RvHDy") 74 | "pastebin.com" 75 | "https://pastebin.com/tE9RvHDy" 76 | "https://pastebin.com/raw/tE9RvHDy")) 77 | 78 | (test circl-paste.lpaste.net () 79 | (test-circl-paste ("lpaste.net" "357379") 80 | "lpaste.net" 81 | "http://lpaste.net/357379" 82 | "http://lpaste.net/raw/357379")) 83 | 84 | (test circl-paste.paste.debian.net () 85 | (test-circl-paste ("paste.debian.net" "979832") 86 | "paste.debian.net" 87 | "http://paste.debian.net/979832" 88 | "http://paste.debian.net/plain/979832")) 89 | 90 | (test circl-paste.ideone.com () 91 | (test-circl-paste ("ideone.com" "8Z8qre") 92 | "ideone.com" 93 | "http://ideone.com/8Z8qre" 94 | "http://ideone.com/plain/8Z8qre")) 95 | 96 | (test circl-paste.paste.opensuse.org () 97 | (test-circl-paste ("paste.opensuse.org" "99489842") 98 | "paste.opensuse.org" 99 | "http://paste.opensuse.org/99489842" 100 | "http://paste.opensuse.org/view/raw/99489842")) 101 | 102 | (test circl-paste.kpaste.net () 103 | (test-circl-paste ("kpaste.net" "432b8") 104 | "kpaste.net" 105 | "http://kpaste.net/432b8" 106 | "http://kpaste.net/432b8?raw")) 107 | 108 | (test circl-paste.pastebin.ru () 109 | (test-circl-paste ("pastebin.ru" "dPydEwzw") 110 | "pastebin.ru" 111 | "http://pastebin.ru/dPydEwzw" 112 | "http://pastebin.ru/dPydEwzw/d/")) 113 | -------------------------------------------------------------------------------- /tests/config/sets.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.tests.config.sets 2 | (:use #:common-lisp #:2am) 3 | (:local-nicknames (#:usr #:pastelyzer.config.user) 4 | (#:sets #:pastelyzer.config.sets)) 5 | (:export #:tests)) 6 | 7 | (in-package #:pastelyzer.tests.config.sets) 8 | 9 | (suite 'tests) 10 | 11 | (test super-domain-set.1 () 12 | (let ((ht (make-hash-table :test 'equal))) 13 | (sets::hashtree-add-path ht '("a") "set a") 14 | (sets::hashtree-add-path ht '("b" "c") "set bc") 15 | 16 | (loop for (value comment) in '((("a") "set a") 17 | (("a" "x") "set a") 18 | (("b" "c") "set bc") 19 | (("b" "c" "x") "set bc")) 20 | do (multiple-value-bind (found note) 21 | (sets::hashtree-present-p ht value) 22 | (is found) 23 | (is (string= comment note)))) 24 | (is (not (sets::hashtree-present-p ht '("b")))))) 25 | 26 | (test super-domain-set.2 () 27 | (let ((ht (make-hash-table :test 'equal))) 28 | (sets::hashtree-add-path ht '("a") ".a") 29 | 30 | (signals warning 31 | (sets::hashtree-add-path ht '("a" "b"))))) 32 | -------------------------------------------------------------------------------- /tests/package.lisp: -------------------------------------------------------------------------------- 1 | (defpackage #:pastelyzer.tests 2 | (:use #:common-lisp #:2am #:pastelyzer) 3 | (:shadowing-import-from #:2am #:run) 4 | (:local-nicknames (#:util #:pastelyzer.util))) 5 | -------------------------------------------------------------------------------- /tests/processing.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer.tests) 2 | 3 | (suite 'processing '(extractors domains)) 4 | 5 | (suite 'extractors) 6 | 7 | (defclass test-job (job) 8 | ((artefacts 9 | :accessor test-job-artefacts 10 | :initform '()))) 11 | 12 | (defmethod register-artefact ((job test-job) (artefact t) (source t)) 13 | (push artefact (test-job-artefacts job)) 14 | artefact) 15 | 16 | (defmethod resolve-domains-p ((job test-job)) 17 | nil) 18 | 19 | (defun extract-artefacts (subject &rest types) 20 | (let* ((fragment (typecase subject 21 | (string 22 | (make-instance 'string-fragment :body subject)) 23 | ((vector (unsigned-byte 8)) 24 | (make-instance 'binary-fragment :body subject)) 25 | (otherwise 26 | subject))) 27 | (job (make-instance 'test-job :subject fragment)) 28 | (result (test-job-artefacts (process job)))) 29 | (values (if types 30 | (let ((classes (mapcar #'find-class types))) 31 | (loop for item in result 32 | when (find (class-of item) classes) 33 | collect item)) 34 | result) 35 | job))) 36 | 37 | (test extractors.credentials.1 () 38 | (let* ((string "There's a user@service.com test email") 39 | (artefacts (extract-artefacts string 'email)) 40 | (artefact (first artefacts))) 41 | (is (= 1 (length artefacts))) 42 | (is (string= "user@service.com" 43 | (artefact-description artefact))))) 44 | 45 | (test extractors.credentials.2 () 46 | ;; Don't allow spaces in passwords until our extractor is smarter. 47 | (let* ((string "Great success: user@service.com:now with spaces!") 48 | (artefacts (extract-artefacts string 'credential)) 49 | (artefact (first artefacts))) 50 | (is (= 1 (length artefacts))) 51 | (is (and (string= "user@service.com" (credential-username artefact)) 52 | (string= "now" (credential-passphrase artefact)))))) 53 | 54 | (test extractors.credentials.3 () 55 | (flet ((test (separator other) 56 | (let* ((exp-pass (format nil "pass~Cword" other)) 57 | (string (format nil "xxx~Cemail@company.com~C~A~Cxxx" 58 | separator separator exp-pass separator)) 59 | (artefacts (extract-artefacts string 'credential)) 60 | (artefact (first artefacts))) 61 | (is (= 1 (length artefacts))) 62 | (is (string= "email@company.com" (credential-username artefact))) 63 | (let ((pass (credential-passphrase artefact))) 64 | (is (string= exp-pass pass) 65 | "With ~C -- expected '~A', got '~A'" 66 | separator exp-pass pass))))) 67 | (test #\: #\;) 68 | (test #\; #\:) 69 | (test #\| #\:) 70 | (test #\tab #\:))) 71 | 72 | (test extractors.credentials.4 () 73 | (let* ((string "FooBar@Service.Com:whatever") 74 | (artefacts (extract-artefacts string 'credential)) 75 | (artefact (first artefacts))) 76 | (is (= 1 (length artefacts))) 77 | (is (string= "FooBar@Service.Com" (credential-username artefact))))) 78 | 79 | (test extractors.credentials.5 () 80 | (dolist (string '("image-file@2x.png" 81 | "for i in 0...@array.size")) 82 | (let ((artefacts (extract-artefacts string 'credential))) 83 | (is (null artefacts))))) 84 | 85 | (test extractors.email.1 () 86 | (let* ((string "XXX.person@test.local. nnThank you, ...") 87 | (emails (extract-artefacts string 'email))) 88 | (is (and (= 1 (length emails)) 89 | (string= "XXX.person@test.local" 90 | (artefact-source (first emails))))))) 91 | 92 | (test extractors.bank-cards.1 () 93 | (flet ((check (digits) 94 | (is (= 1 (length (extract-artefacts digits)))))) 95 | ;; Another number good for testing: 1111-2222-3333-4444. 96 | (check "4242 4242 4242 4242") 97 | (check "4242 4242 4242 4242") 98 | (check "4242-4242-4242-4242") 99 | (check "4242424242424242"))) 100 | 101 | (test extractors.bank-cards.2 () 102 | (flet ((check (digits) 103 | (is (endp (extract-artefacts digits))))) 104 | (check "4242_4242_4242_4242") 105 | (check "4242-4242_4242-4242") 106 | (check "4242424242424242.42") 107 | (check "42.4242424242424242") 108 | (check "42.4242424242424242.42"))) 109 | 110 | (test extractors.bank-cards.3 () 111 | "No real bank would make a bank card with all the digits be ing the 112 | same, right? " 113 | (is (endp (extract-artefacts "1111111111111111")))) 114 | 115 | (test extractors.bank-cards.4 () 116 | "We would like to not fall for hexdumps, but nothing we can do while 117 | we're using regular expressions..." 118 | (let ((string "4242 4242 4242 4242 4242 4242 4242 4242")) 119 | (is (= 2 (length (extract-artefacts string)))))) 120 | 121 | (test extractors.bank-cards.amex.1 () 122 | (flet ((check (digits) 123 | (is (= 1 (length (extract-artefacts digits)))))) 124 | (check "4242 424242 42424") 125 | (check "4242 424242 42424") 126 | (check "4242-424242-42424") 127 | (check "424242424242424"))) 128 | 129 | (test extractors.addresses.1 () 130 | (let* ((artefacts (extract-artefacts "10.0.0.42 8.16.32.127 8.16.32.128" 131 | 'ip-address))) 132 | (is (= 3 (length artefacts))))) 133 | 134 | (test extractors.services.1 () 135 | (let* ((string (format nil "a 1.2.3.4:1234 z")) 136 | (artefacts (extract-artefacts string 'ip-service))) 137 | (is (= 1 (length artefacts))) 138 | (= 1234 (ip-service-port (first artefacts))))) 139 | 140 | (test extractors.embedded-binary.bin.1 () 141 | (let* ((blob #(#xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D)) 142 | (string (format nil "xxx ~{~8,'0B~} xxx" (coerce blob 'list))) 143 | (input (map '(vector (unsigned-byte 8) *) #'char-code string)) 144 | (artefacts (extract-artefacts input 'binary-blob))) 145 | (is (= 1 (length artefacts))) 146 | (is (equalp blob 147 | (fragment-body (embedded-binary-bytes (first artefacts))))))) 148 | 149 | (test extractors.embedded-binary.base64.1 () 150 | (let* ((bytes #(#xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 151 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 152 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 153 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 154 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 155 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D)) 156 | (blob (make-array (length bytes) 157 | :element-type '(unsigned-byte 8) 158 | :initial-contents bytes)) 159 | (string 160 | (format nil "### Keybase proof xxx ~A xxx" 161 | (base64:usb8-array-to-base64-string blob))) 162 | (input (map '(vector (unsigned-byte 8) *) #'char-code string)) 163 | (artefacts (extract-artefacts input 'base64-blob))) 164 | (is (= 1 (length artefacts))) 165 | (let ((artefact (first artefacts))) 166 | (is (equalp blob 167 | (fragment-body (embedded-binary-bytes artefact))))))) 168 | 169 | (test extractors.embedded-binary.hex.1 () 170 | (let* ((blob #(#xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 171 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 172 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D 173 | #xDE #xAD #xBE #xEF #xFE #xED #xFA #xCE #xD0 #x0D)) 174 | (string (format nil "xxx ~{~2,'0X~} xxx" (coerce blob 'list))) 175 | (input (map '(vector (unsigned-byte 8) *) #'char-code string)) 176 | (artefacts (extract-artefacts input 'hex-blob))) 177 | (is (= 1 (length artefacts))) 178 | (is (equalp blob 179 | (fragment-body (embedded-binary-bytes (first artefacts))))))) 180 | 181 | (test extractors.embedded-binary.base64.2 () 182 | (let* ((string "Her b tresur: cHlyaXRlQGV4YW1wbGUuY29tOnlhcnJoYXJyeWFycmhhcnJ5YXJyaGFycnlhcnJoYXJyeWFycmhhcnJ5YXJyaGFycnlhcnJoYXJy") 183 | (bytes (map '(vector (unsigned-byte 8)) #'char-code string)) 184 | (artefacts (extract-artefacts bytes))) 185 | (dolist (artefact artefacts) 186 | (typecase artefact 187 | (credential 188 | (is (string= "pyrite@example.com" (credential-username artefact))) 189 | (is (string= "yarrharryarrharryarrharryarrharryarrharryarrharryarrharr" 190 | (credential-passphrase artefact)))) 191 | (domain 192 | (is (string= "example.com" (artefact-description artefact)))))))) 193 | 194 | (test extractors.embedded-binary.0x.1 () 195 | (let* ((string " 196 | [Byte[]] $Shellcode32 = @(0xfc,0xe8,0x89,0x00,0x00,0x00,0x60,0x89,0xe5, 197 | 0x80,0xfb,0xe0,0x75,0x05,0xbb,0x47,0x13,0x72, 198 | 0x61,0x6c,0x63,0x00)") 199 | (artefacts (extract-artefacts string))) 200 | (is (= 1 (length artefacts))) 201 | (is (= 22 (length (fragment-body (embedded-binary-bytes (first artefacts)))))))) 202 | 203 | (test extractors.embedded-binary.0x.2 () 204 | (let* ((string " 205 | [Byte[]] $Shellcode32 = @(0xfc, 0xe8, 0x89, 0x00, 0x00, 0x00, 0x60, 0x89, 0xe5, 206 | 0x80, 0xfb, 0xe0, 0x75, 0x05, 0xbb, 0x47, 0x13, 0x72, 207 | 0x61, 0x6c, 0x63, 0x00)") 208 | (artefacts (extract-artefacts string))) 209 | (is (= 1 (length artefacts))) 210 | (is (= 22 (length (fragment-body (embedded-binary-bytes (first artefacts)))))))) 211 | 212 | (test extractors.uri.1 () 213 | (let* ((string "xxx> http://example.local\\r\\n") 214 | (artefacts (extract-artefacts string 'uri))) 215 | (is (= 1 (length artefacts))) 216 | (is (string= "http://example.local" 217 | (artefact-source (first artefacts)))))) 218 | 219 | (test extractors.uri.2 () 220 | (let* ((string " http://user:pass@test.local:88/foo/bar/get?xxx=zzz#abc ") 221 | (artefacts (extract-artefacts string))) 222 | (is (and (<= 2 (length artefacts)) 223 | (some (lambda (artefact) (typep artefact 'uri)) artefacts) 224 | (some (lambda (artefact) (typep artefact 'credential)) artefacts))) 225 | (dolist (artefact artefacts) 226 | (typecase artefact 227 | (credential 228 | (is (or (and (string= "user" (credential-username artefact)) 229 | (string= "pass" (credential-passphrase artefact))) 230 | ;; XXX: Expected failure. 231 | (string= "pass@test.local" 232 | (credential-username artefact))))) 233 | (uri 234 | (is (string= "http://user:pass@test.local:88/foo/bar/get?xxx=zzz#abc" 235 | (artefact-source artefact)))))))) 236 | 237 | (test extractors.uri.3 () 238 | (let* ((string "") 239 | (artefacts (extract-artefacts string 'uri))) 240 | (is (endp artefacts)))) 241 | 242 | (test extractors.uri.4 () 243 | (let* ((string "a https://2.xx.example.local/w/e.png z") 244 | (artefacts (extract-artefacts string 'uri))) 245 | (is (= 1 (length artefacts))) 246 | (is (string= "https://2.xx.example.local/w/e.png" 247 | (artefact-source (first artefacts)))))) 248 | 249 | (test extractors.uri.5 () 250 | (let* ((string "a hxxps://example.local/xyz z") 251 | (artefacts (extract-artefacts string 'uri))) 252 | (is (= 1 (length artefacts))))) 253 | 254 | (test extractors.uri.numeric-host.ipv4.1 () 255 | (let* ((string "#EXTM3U 256 | http://91.109.113.38:8000/playlist.m3u8 257 | #EXTINF:0,http://220.125.160.36:9981/playlist") 258 | (uris (extract-artefacts string 'uri))) 259 | (is (= 2 (length uris))) 260 | (dolist (artefact uris) 261 | (let ((uri (artefact-source artefact))) 262 | (is (or (string= "http://91.109.113.38:8000/playlist.m3u8" uri) 263 | (string= "http://220.125.160.36:9981/playlist" uri))))))) 264 | 265 | (test extractors.uri.numeric-host.ipv4.2 () 266 | (let* ((string "aaa http://2130706433/APH.hta zzz") 267 | (artefacts (extract-artefacts string 'uri))) 268 | (is (= 1 (length artefacts))) 269 | (is (string= "http://2130706433/APH.hta" 270 | (artefact-source (first artefacts)))))) 271 | 272 | (test extractors.uri.numeric-host.ipv4.3 () 273 | (let* ((string "aaa http://127.1/APH.hta zzz") 274 | (artefacts (extract-artefacts string 'uri))) 275 | (is (= 1 (length artefacts))) 276 | (is (string= "http://127.1/APH.hta" 277 | (artefact-source (first artefacts)))))) 278 | 279 | (test extractors.uri.numeric-host.ipv6.1 () 280 | ;; https://tools.ietf.org/html/rfc3513#section-2.2 281 | (let ((addresses '("FEDC:BA98:7654:3210:FEDC:BA98:7654:3210" 282 | "1080:0:0:0:8:800:200C:417A" 283 | "FF01:0:0:0:0:0:0:101" 284 | "0:0:0:0:0:0:0:1" 285 | "1080::8:800:200C:417A" 286 | "FF01::101" 287 | "::1" 288 | "0:0:0:0:0:0:13.1.68.3" 289 | "0:0:0:0:0:FFFF:129.144.52.38" 290 | "::13.1.68.3" 291 | "::FFFF:129.144.52.38"))) 292 | (loop for addr in addresses 293 | for string = (format nil "a http://[~A]/xxx z" addr) 294 | do (let ((artefacts (extract-artefacts string 'uri))) 295 | (is (= 1 (length artefacts)) 296 | "Failed to extract URI from ~S" string))))) 297 | 298 | (test extractors.uri.onion () 299 | (let ((artefacts (extract-artefacts "xxx http://3g2upl4pq6kufc4m.onion/ zzz" 300 | 'uri))) 301 | (is (= 1 (length artefacts))) 302 | (is (string= "http://3g2upl4pq6kufc4m.onion/" 303 | (artefact-source (first artefacts)))))) 304 | 305 | (test extractors.domains.onion () 306 | (let ((artefacts (extract-artefacts "xxx 3g2upl4pq6kufc4m.onion zzz" 'onion))) 307 | (is (= 1 (length artefacts))) 308 | (is (string= "3g2upl4pq6kufc4m.onion" 309 | (artefact-source (first artefacts)))))) 310 | 311 | (test extractors.nested.1 () 312 | (let* ((string (format nil "~@{~A~^~%~}" 313 | "H4sICAQnv1wAA2Zha2UtcGFzdGUA8zrS" 314 | "mJdZrOBydF9qUebRtqMLrRSyEoEieilV" 315 | "QIG8Yofk1KISvZwyLgBMgLO1KQAAAA==")) 316 | (emails (extract-artefacts string 'email))) 317 | (is (= 1 (length emails))) 318 | (is (string= "janis.dzerins@cert.lv" 319 | (artefact-source (first emails)))))) 320 | 321 | (test extractors.nested.from-paste () 322 | (let* ((string (format nil "~@{~A~^~%~}" 323 | "H4sICAQnv1wAA2Zha2UtcGFzdGUA8zrS" 324 | "mJdZrOBydF9qUebRtqMLrRSyEoEieilV" 325 | "QIG8Yofk1KISvZwyLgBMgLO1KQAAAA==")) 326 | (bytes (map '(vector (unsigned-byte 8)) #'char-code string)) 327 | (content (make-instance 'content :id 0 :body bytes)) 328 | (paste (make-instance 'circl-paste 329 | :id 0 330 | :provider "test" 331 | :provider-id "from-paste" 332 | :content content)) 333 | (emails (extract-artefacts paste 'email))) 334 | (is (= 1 (length emails))) 335 | (is (string= "janis.dzerins@cert.lv" 336 | (artefact-source (first emails))))) ) 337 | 338 | (test extractors.broken-utf-8 () 339 | (let* ((codes 340 | #(239 191 189 239 191 189 239 191 189 239 191 189 239 191 189 239 341 | 191 189 239 191 189 239 191 189 239 191 189 239 191 189 239 191 342 | 189 239 191 189 239 191 189 32 239 191 189 239 191 189 239 191 343 | 189 239 191 189 239 191 189 239 191 189 239 191 189 239 191 189 344 | 239 191 189 239 191 189 239 191 189 239 191 189 239 191 189 239 345 | 191 189 32 239 191 189 239 191 189 239 191 189 239 191 189 58)) 346 | (bytes (coerce codes '(vector (unsigned-byte 8)))) 347 | (content (make-instance 'content :id 0 :body bytes)) 348 | (paste (make-instance 'circl-paste 349 | :id 0 350 | :provider "test" 351 | :provider-id "from-paste" 352 | :content content))) 353 | (signals broken-utf-8 (extract-artefacts paste)))) 354 | 355 | (suite 'domains) 356 | 357 | (defmacro is-viable-domain-p ((string &optional (start 0) end) 358 | exp-value exp-reason) 359 | (check-type string string) 360 | (let ((end (or end (length string)))) 361 | `(multiple-value-bind (.value. .reason.) 362 | (pastelyzer::viable-domain-p ,string ,start ,end) 363 | (is (and (eq ,exp-value .value.) 364 | (eq ,exp-reason .reason.)) 365 | "Expected '~A' in '~A' to be (~S ~S), got (~S ~S)" 366 | (subseq ,string ,start ,end) ,string 367 | ,exp-value ,exp-reason 368 | .value. .reason.)))) 369 | 370 | (test viable-domains.fall-through () 371 | (is-viable-domain-p ("www.xxx.com") t :undecided) 372 | (is-viable-domain-p ("123.xxx.com") t :undecided) 373 | (is-viable-domain-p ("www.xxx.xn--123xx") t :undecided)) 374 | 375 | (test viable-domains.tld () 376 | (is-viable-domain-p ("www.xxx.c0m") nil :invalid-tld) 377 | (is-viable-domain-p ("www.xxx.c-m") nil :invalid-tld)) 378 | 379 | (test viable-domains.labels () 380 | (is-viable-domain-p ("www.123.com") nil :invalid-labels) 381 | 382 | (is-viable-domain-p ("www.xxx.-xx") nil :invalid-labels) 383 | (is-viable-domain-p ("www.xxx.xx-") nil :invalid-labels) 384 | (is-viable-domain-p ("www.-xx.com") nil :invalid-labels) 385 | (is-viable-domain-p ("www.xx-.com") nil :invalid-labels) 386 | (is-viable-domain-p ("-ww.xxx.com") nil :invalid-labels) 387 | (is-viable-domain-p ("ww-.xxx.com") nil :invalid-labels) 388 | 389 | (is-viable-domain-p ("www.xxx.c_m") nil :invalid-labels) 390 | (is-viable-domain-p ("www.x_x.com") nil :invalid-labels) 391 | (is-viable-domain-p ("w_w.xxx.com") nil :invalid-labels)) 392 | 393 | (test viable-domains.url-like () 394 | (is-viable-domain-p ("http://foo.xxx.com x" 7 18) t :url-like) 395 | (is-viable-domain-p ("http://foo.xxx.com/ x" 7 18) t :url-like) 396 | (is-viable-domain-p ("http://foo.xxx.com:80 x" 7 18) t :url-like) 397 | (is-viable-domain-p ("http://foo.xxx.java x" 7 19) t :url-like)) 398 | 399 | (test viable-domains.self () 400 | (is-viable-domain-p ("self.whatever") nil :self) 401 | (is-viable-domain-p ("self.whatever=" 0 13) nil :self) 402 | (is-viable-domain-p ("self.whatever = " 0 13) nil :self)) 403 | 404 | (test viable-domains.file () 405 | (is-viable-domain-p ("Third.png") nil :file-name)) 406 | 407 | (test viable-domains.path () 408 | (is-viable-domain-p ("/xxx.com" 1) nil :path) 409 | (is-viable-domain-p ("\\xxx.com" 1) nil :path)) 410 | 411 | (test viable-domains.function-call () 412 | (is-viable-domain-p ("xxx.whatever()" 0 12) nil :function-call)) 413 | 414 | (test viable-domains.assignment () 415 | (is-viable-domain-p ("xxx.whatever=" 0 12) nil :expression) 416 | (is-viable-domain-p ("xxx.whatever = " 0 12) nil :expression)) 417 | 418 | (test domains.1 () 419 | (let ((string "tw(rw,cfn(.3,.7,0)*ang(-math.pi/2,math.rad(-40),0),.1,'')")) 420 | (is (endp (extract-artefacts string))))) 421 | -------------------------------------------------------------------------------- /tests/suites.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer.tests) 2 | 3 | ;;; This will not work nicely if any other loaded system uses T as the 4 | ;;; suite identifier -- if more than one such test suite is loaded in 5 | ;;; the same image the latest loaded suite will override all 6 | ;;; previously loaded ones. 7 | ;;; 8 | ;;; An alternative to this would be to use a name like 'all, and 9 | ;;; instead of: 10 | ;;; 11 | ;;; (symbol-call '#:2am '#:run 't) 12 | ;;; 13 | ;;; in the system definition we'd have to do something like: 14 | ;;; 15 | ;;; (symbol-call '#:2am '#:run (find-symbol* '#:all '#:pastelyzer.test)) 16 | ;;; 17 | ;;; which does not look nice and feels like too much unnecessary code 18 | ;;; in a system definition. 19 | ;;; 20 | ;;; Also from within another system (e.g., pastelyzer) doing 21 | ;;; 22 | ;;; (2am:run 't) 23 | ;;; 24 | ;;; is much more convenient to do than 25 | ;;; 26 | ;;; (2am:run 'pastelyzer.test:all) 27 | ;;; 28 | (suite 'config '(pastelyzer.tests.config.filter:tests 29 | pastelyzer.tests.config.sets:tests)) 30 | 31 | (suite 't '(addresses processing util circl-paste config)) 32 | -------------------------------------------------------------------------------- /tests/util.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:pastelyzer.tests) 2 | 3 | (suite 'util) 4 | 5 | (test util.alphabet.empty () 6 | (is (equal "" (pastelyzer::alphabet ""))) 7 | (is (equal "0" (pastelyzer::alphabet "0"))) 8 | (is (equal "0" (pastelyzer::alphabet "00"))) 9 | (is (equal "01" (pastelyzer::alphabet "01"))) 10 | (is (equal "01" (pastelyzer::alphabet "10"))) 11 | (is (equal "01" (pastelyzer::alphabet "101"))) 12 | (is (equal "01" (pastelyzer::alphabet "010"))) ) 13 | 14 | (test util.sub-alphabet () 15 | (is (pastelyzer::sub-alphabet-p "" "")) 16 | (is (pastelyzer::sub-alphabet-p "" "ab")) 17 | (is (pastelyzer::sub-alphabet-p "a" "ab")) 18 | (is (pastelyzer::sub-alphabet-p "b" "ab")) 19 | (is (not (pastelyzer::sub-alphabet-p "c" "ab")))) 20 | 21 | (test util.string-context () 22 | (let ((string (format nil "aaa~C~Cwhatever NEEDLE blah blah ~C~C~Czzz" 23 | ;; ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 24 | ;; 0 3 4 5 14 20 25 31 33 34 25 | #\linefeed #\tab #\tab #\return #\linefeed))) 26 | (macrolet ((check (expected result) 27 | `(let ((.expected. ,expected) 28 | (.result. ,result)) 29 | (is (string= .expected. .result.) 30 | "Expected ~S, got ~S" .expected. .result.)))) 31 | (check 32 | "" 33 | (pastelyzer::string-context-before string 0)) 34 | (check 35 | "aaa..whatever " 36 | (pastelyzer::string-context-before string 14 :bol nil)) 37 | (check 38 | "a..whatever " 39 | (pastelyzer::string-context-before string 14 :after 2 :trim-space t)) 40 | (check 41 | "whatever " 42 | (pastelyzer::string-context-before string 14 :after 3 :trim-space t)) 43 | (check 44 | ".whatever " 45 | (pastelyzer::string-context-before string 14 :bol t :trim-space nil)) 46 | (check 47 | "whatever " 48 | (pastelyzer::string-context-before string 14 :bol t :trim-space t)) 49 | (check 50 | "ever " 51 | (pastelyzer::string-context-before string 14 :limit 5)) 52 | (check 53 | "" 54 | (pastelyzer::string-context-after string (length string))) 55 | (check 56 | " blah blah ...zzz" 57 | (pastelyzer::string-context-after string 20 :eol nil)) 58 | (check 59 | " blah blah ...z" 60 | (pastelyzer::string-context-after string 20 :before 35 :trim-space t)) 61 | (check 62 | " blah blah" 63 | (pastelyzer::string-context-after string 20 :before 34 :trim-space t)) 64 | (check 65 | ;; XXX: Do we want eol to be until #\return? 66 | " blah blah .." 67 | (pastelyzer::string-context-after string 20 :eol t :trim-space nil)) 68 | (check 69 | " blah blah" 70 | (pastelyzer::string-context-after string 20 :eol t :trim-space t)) 71 | (check 72 | " blah" 73 | (pastelyzer::string-context-after string 20 :limit 5))))) 74 | 75 | (test util.summarize-numbers () 76 | (macrolet ((check (input expected) 77 | `(let ((.input. ,input) 78 | (.expected. ,expected) 79 | (.result. (util:summarize-numbers ,input))) 80 | (is (string= .expected. .result.) 81 | "~S should summarize to ~S, not ~S" 82 | .input. .expected. .result.)))) 83 | (check '() "") 84 | (check '(1) "1") 85 | (check '(1 1) "1") 86 | (check '(1 2) "1-2") 87 | (check '(1 2 3) "1-3") 88 | (check '(1 4) "1, 4") 89 | (check '(1 2 4) "1-2, 4") 90 | (check '(1 3 4) "1, 3-4") 91 | (check '(1 2 3 4) "1-4"))) 92 | --------------------------------------------------------------------------------