├── resources ├── mongos.conf └── mongod.conf ├── doc └── intro.md ├── .gitignore ├── project.clj ├── CHANGELOG.md ├── test └── jepsen │ └── mongodb │ └── list_append_test.clj ├── src └── jepsen │ ├── mongodb │ ├── db │ │ ├── proxy.clj │ │ └── local_proxy.clj │ ├── nemesis.clj │ ├── list_append_multi_node.clj │ ├── list_append.clj │ ├── db.clj │ └── client.clj │ └── mongodb.clj ├── README.md └── LICENSE /resources/mongos.conf: -------------------------------------------------------------------------------- 1 | sharding: 2 | configDB: %CONFIG_DB% 3 | 4 | net: 5 | bindIp: %IP% 6 | -------------------------------------------------------------------------------- /doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to jepsen.mongodb 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /store 5 | profiles.clj 6 | pom.xml 7 | pom.xml.asc 8 | *.jar 9 | *.swp 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | .hgignore 14 | .hg/ 15 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject jepsen.mongodb "0.3.2-SNAPSHOT" 2 | :description "Jepsen MongoDB tests" 3 | :url "http://github.com/jepsen-io/mongodb" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.11.1"] 7 | [clj-wallhack "1.0.1"] 8 | [jepsen "0.3.3-SNAPSHOT"] 9 | [org.mongodb/mongodb-driver-sync "4.6.0"]] 10 | :main jepsen.mongodb 11 | :jvm-opts ["-Djava.awt.headless=true"] 12 | :repl-options {:init-ns jepsen.mongodb}) 13 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2020-05-03 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2020-05-03 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://github.com/your-name/jepsen.mongodb/compare/0.1.1...HEAD 24 | [0.1.1]: https://github.com/your-name/jepsen.mongodb/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /resources/mongod.conf: -------------------------------------------------------------------------------- 1 | # mongod.conf 2 | 3 | # for documentation of all options, see: 4 | # http://docs.mongodb.org/manual/reference/configuration-options/ 5 | 6 | # Where and how to store data. 7 | storage: 8 | dbPath: /var/lib/mongodb 9 | journal: 10 | enabled: true 11 | # engine: 12 | # mmapv1: 13 | # wiredTiger: 14 | 15 | # where to write logging data. 16 | systemLog: 17 | destination: file 18 | logAppend: true 19 | path: /var/log/mongodb/mongod.log 20 | 21 | # network interfaces 22 | net: 23 | # port: 27017 24 | bindIp: %IP% 25 | 26 | # how the process runs 27 | processManagement: 28 | timeZoneInfo: /usr/share/zoneinfo 29 | 30 | #security: 31 | 32 | #operationProfiling: 33 | 34 | replication: 35 | replSetName: %REPL_SET_NAME% 36 | 37 | sharding: 38 | clusterRole: %CLUSTER_ROLE% 39 | 40 | ## Enterprise-Only Options: 41 | 42 | #auditLog: 43 | 44 | #snmp: 45 | 46 | -------------------------------------------------------------------------------- /test/jepsen/mongodb/list_append_test.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.list-append-test 2 | (:require [clojure [pprint :refer [pprint]] 3 | [test :refer :all]] 4 | [jepsen [checker :as checker] 5 | [util :as util]] 6 | [jepsen.mongodb.list-append :as la])) 7 | 8 | (deftest stale-read-test 9 | ; Making sure that we can catch stale reads with our checker 10 | (let [ax1 {:index 0, :time 0, :process 0, :type :invoke, :f :txn, :value [[:append :x 1]]} 11 | ax1' {:index 1, :time 1, :process 0, :type :ok, :f :txn, :value [[:append :x 1]]} 12 | rx {:index 2, :time 2, :process 1, :type :invoke, :f :txn, :value [[:r :x nil]]} 13 | rx' {:index 3, :time 3, :process 1, :type :ok, :f :txn, :value [[:r :x nil]]} 14 | history [ax1 ax1' rx rx'] 15 | test (-> {:name "stale read test" 16 | :start-time (util/local-time) 17 | :history history} 18 | (merge (la/workload {}))) 19 | results (checker/check (:checker test) test history {})] 20 | ;(pprint results) 21 | (is (= #{:strong-snapshot-isolation} (->> results :elle :not))) 22 | (is (= #{:G-single-realtime} (->> results :elle :anomalies keys set))))) 23 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/db/proxy.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.db.proxy 2 | "Sets up a proxy binary on each DB node." 3 | (:require [clojure [pprint :refer [pprint]] 4 | [string :as str]] 5 | [clojure.java.io :as io] 6 | [clojure.tools.logging :refer [info warn]] 7 | [dom-top.core :refer [real-pmap]] 8 | [jepsen [control :as c] 9 | [core :as jepsen] 10 | [db :as db] 11 | [util :as util :refer [meh random-nonempty-subset sh]]] 12 | [jepsen.control [net :as cn] 13 | [util :as cu]] 14 | [jepsen.os.debian :as debian] 15 | [jepsen.mongodb [client :as client :refer [Conn 16 | host 17 | port]]] 18 | [slingshot.slingshot :refer [try+ throw+]])) 19 | 20 | (def dir 21 | "Where do we install our proxy?" 22 | "/opt/jepsen/mongo-proxy") 23 | 24 | (def log-file (str dir "/log")) 25 | (def pid-file (str dir "/pid")) 26 | (def bin (str dir "/mongo-proxy")) 27 | 28 | (def listener-count 29 | "How many proxy listener ports do we bind?" 30 | 3) 31 | 32 | (defn start! 33 | "Starts the proxy for a test." 34 | [test db node] 35 | (c/su 36 | (cu/start-daemon! 37 | {:logfile log-file 38 | :pidfile pid-file 39 | :chdir dir} 40 | bin 41 | :-mongo-upstream (str (cn/ip node) ":" (port db test)) 42 | :-bind-address (cn/ip node) 43 | :-listener-count listener-count))) 44 | 45 | (defrecord ProxyDB [db] 46 | Conn 47 | (host [_ test node] 48 | (host db test node)) 49 | 50 | (port [_ test] 51 | (+ client/proxy-port (rand-int listener-count))) 52 | 53 | db/DB 54 | (setup! [this test node] 55 | (db/setup! db test node) 56 | (when-let [proxy (:proxy test)] 57 | (c/su 58 | (c/exec :mkdir :-p dir) 59 | (c/upload proxy bin) 60 | (c/exec :chmod :+x bin) 61 | (start! test db node)))) 62 | 63 | (teardown! [this test node] 64 | (when (:proxy test) 65 | (c/su (cu/stop-daemon! bin pid-file)) 66 | (c/exec :rm :-rf pid-file log-file)) 67 | (db/teardown! db test node)) 68 | 69 | db/Primary 70 | (setup-primary! [_ test node] 71 | (db/setup-primary! db test node)) 72 | 73 | (primaries [_ test] 74 | (db/primaries db test)) 75 | 76 | db/LogFiles 77 | (log-files [_ test node] 78 | (merge (db/log-files db test node) 79 | (when (:proxy test) 80 | {log-file "proxy.log"}))) 81 | 82 | ; We don't bother injecting faults into the proxy itself 83 | db/Process 84 | (start! [_ test node] 85 | (db/start! db test node) 86 | ) 87 | 88 | (kill! [_ test node] 89 | (db/kill! db test node)) 90 | 91 | db/Pause 92 | (pause! [_ test node] 93 | (db/pause! db test node)) 94 | 95 | (resume! [this test node] 96 | (db/resume! db test node))) 97 | 98 | (defn db 99 | "Wraps another database, uploading a local proxy binary (:proxy test) 100 | to each node, and running it there." 101 | [db] 102 | (ProxyDB. db)) 103 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/nemesis.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.nemesis 2 | "Nemeses for MongoDB" 3 | (:require [clojure.pprint :refer [pprint]] 4 | [clojure.tools.logging :refer [info warn]] 5 | [dom-top.core :refer [real-pmap]] 6 | [jepsen [nemesis :as n] 7 | [net :as net] 8 | [util :as util]] 9 | [jepsen.generator :as gen] 10 | [jepsen.nemesis [combined :as nc] 11 | [time :as nt]] 12 | [jepsen.mongodb.db :as db])) 13 | 14 | (defn shard-generator 15 | "Takes a collection of shard packages, and returns a generator that emits ops 16 | like {:f whatever, :shard \"foo\", :value blah}, drawn from one of the shard 17 | packages." 18 | [packages] 19 | (->> packages 20 | (map (fn [pkg] 21 | (let [shard-name (:name pkg)] 22 | (gen/map (fn [op] (assoc op :shard shard-name)) 23 | (:generator pkg))))) 24 | gen/mix)) 25 | 26 | (defn shard-nemesis 27 | "Takes a collection of shard packages, and returns a nemesis that 28 | takes ops like {:f whatever, :shard \"foo\", :value blah}, and dispatches 29 | that op to the nemesis for that particular shard." 30 | [packages] 31 | (reify n/Nemesis 32 | (setup! [this test] 33 | (shard-nemesis 34 | (real-pmap (fn [pkg] 35 | (update pkg :nemesis 36 | n/setup! (db/test-for-shard test pkg))) 37 | packages))) 38 | 39 | (invoke! [this test op] 40 | (let [shard-name (:shard op) 41 | pkg (first (filter (comp #{shard-name} :name) packages)) 42 | nemesis (:nemesis pkg) 43 | test (db/test-for-shard test pkg) 44 | op' (n/invoke! (:nemesis pkg) test op)] 45 | op')) 46 | 47 | (teardown! [this test] 48 | (real-pmap (fn [pkg] 49 | (n/teardown! (:nemesis pkg) 50 | (db/test-for-shard test pkg))) 51 | packages)) 52 | 53 | n/Reflection 54 | (fs [this] 55 | (set (mapcat (comp n/fs :nemesis) packages))))) 56 | 57 | (defn package-for-shard 58 | "Builds a nemesis package for a specific shard, merged with the shard map 59 | itself." 60 | [opts shard] 61 | (merge shard 62 | (nc/nemesis-package (assoc opts :db (:db shard))))) 63 | 64 | (defn sharded-nemesis-package 65 | "Constructs a nemesis and generators for a sharded MongoDB." 66 | [opts] 67 | (let [; Construct a package for each shard 68 | pkgs (map (partial package-for-shard opts) (:shards (:db opts)))] 69 | 70 | ; Now, we need a generator and nemesis which mix operations on various 71 | ; shards, and route those operations to the nemesis for each appropriate 72 | ; shards. We merge these onto a nemesis package for the whole test--that 73 | ; gives us 74 | (assoc (nc/nemesis-package opts) 75 | :generator (shard-generator pkgs) 76 | :final-generator nil 77 | :nemesis (shard-nemesis pkgs))) 78 | ; Or just do a standard package 79 | ; TODO: mix these 80 | ;(nc/nemesis-package) 81 | ) 82 | 83 | (defn nemesis-package 84 | "Constructs a nemesis and generators for MongoDB based on CLI options." 85 | [opts] 86 | (let [opts' (-> opts 87 | (assoc :interval 0) ; Fixed intervals 88 | (update :faults set))] 89 | (-> (if (:sharded opts') 90 | (sharded-nemesis-package opts') 91 | (nc/nemesis-package opts')) 92 | (update :generator (partial gen/delay (:interval opts)))))) 93 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/list_append_multi_node.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.list-append-multi-node 2 | "A variant of the list-append workload which performs transactions across 3 | *different* nodes, using the same session ID and transaction number across 4 | all." 5 | (:require [clojure [pprint :refer [pprint]]] 6 | [clojure.tools.logging :refer [info warn]] 7 | [dom-top.core :refer [loopr 8 | with-retry]] 9 | [elle.list-append :as elle.list-append] 10 | [jepsen [client :as client] 11 | [checker :as checker] 12 | [util :as util :refer [timeout 13 | map-vals]]] 14 | [jepsen.mongodb [client :as c] 15 | [list-append :as list-append]] 16 | [slingshot.slingshot :as slingshot]) 17 | (:import (java.util.concurrent TimeUnit) 18 | (com.mongodb MongoCommandException 19 | MongoNodeIsRecoveringException 20 | MongoNotPrimaryException 21 | TransactionOptions 22 | ReadConcern 23 | ReadPreference 24 | WriteConcern) 25 | (com.mongodb.client.model Filters 26 | UpdateOptions))) 27 | 28 | (defn txn! 29 | "Actually execute a transaction across multiple connections." 30 | [test primary-conn conns txn] 31 | (with-open [primary-session (c/start-session primary-conn)] 32 | (let [opts (list-append/txn-options test txn)] 33 | ; Start txn 34 | (c/start-txn! primary-session opts) 35 | ; Apply each micro-op to a different cloned session 36 | (let [txn' (mapv (fn apply-mop! [mop] 37 | (let [conn (rand-nth conns) 38 | db (c/db conn list-append/db-name test)] 39 | (with-open [session (c/start-session conn)] 40 | (c/with-session-like [session primary-session] 41 | (list-append/apply-mop! test db session mop))))) 42 | txn)] 43 | ; And commit 44 | (with-open [session (c/start-session (rand-nth conns))] 45 | (c/with-session-like [session primary-session] 46 | (c/notify-message-sent! session) 47 | (c/commit-txn! session))))))) 48 | 49 | (defrecord Client [conns] 50 | client/Client 51 | (open! [this test node] 52 | (assoc this :conns (->> ;(:nodes test) 53 | ; We set up n connections to the *same* node. This 54 | ; means we don't get to see what happens when we 55 | ; split a transaction across old and new primaries, 56 | ; but if we don't do this almost every txn is 57 | ; doomed to failure because ONLY a primary can do 58 | ; transaction stuff, and the chances that every op 59 | ; rolls the dice correctly and hits a primary are 60 | ; not good. 61 | (repeat 3 node) 62 | (mapv (fn [node] 63 | (c/open node test)))))) 64 | 65 | (setup! [this test] 66 | (list-append/create-coll! test (first conns))) 67 | 68 | (invoke! [this test op] 69 | (let [txn (:value op)] 70 | (c/with-errors op 71 | (timeout 5000 (assoc op :type :info, :error :timeout) 72 | (let [conn (rand-nth conns) 73 | txn' (if (and (<= (count txn) 1) 74 | (not (:singleton-txns test))) 75 | ; We can run without a txn 76 | (let [db (c/db conn list-append/db-name test)] 77 | [(list-append/apply-mop! test db nil (first txn))]) 78 | 79 | (txn! test conn conns txn))] 80 | (assoc op :type :ok, :value txn')))))) 81 | 82 | (teardown! [this test]) 83 | 84 | (close! [this test] 85 | (mapv c/close! conns))) 86 | 87 | (defn workload 88 | "Same options as for list-append/workload, but uses our custom client." 89 | [opts] 90 | (-> (list-append/workload opts) 91 | (assoc :client (Client. nil)))) 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jepsen.mongodb 2 | 3 | Tests for MongoDB, running on Debian Buster. You want 9 nodes for multi-shard 4 | tests: 3 for the config replica set, and 3 for each of 2 shards. 5 | 6 | ## Usage 7 | 8 | ``` 9 | lein run test-all -w list-append --nodes-file ~/nodes -r 1000 --concurrency 3n --time-limit 120 --max-writes-per-key 128 --read-concern majority --write-concern majority --txn-read-concern snapshot --txn-write-concern majority --nemesis-interval 1 --nemesis partition --test-count 30 10 | ``` 11 | 12 | ## Workloads 13 | 14 | `list-append` performs a mix of non-transactional and transactional appends and reads to documents by primary key. 15 | `list-append-multi-node` tries to do the same, but splitting requests across multiple nodes. This does not work yet, and may never. 16 | 17 | ## Nemeses 18 | 19 | `partition`, `kill`, and `pause` create/resolve network partitions, 20 | kill/restart processes with `kill -9`, and pause processes with 21 | `SIGSTOP/SIGCONT`. `clock` adjusts the clock by anywhere from a few millis to 22 | hundreds of seconds, and sometimes strobes the clock rapidly back and forth. 23 | `member` adds and removes nodes dynamically. 24 | 25 | ## Options 26 | 27 | See `lein run test --help` for all options. 28 | 29 | `--hidden NUM` allows you to designate the first `NUM` nodes in each replica 30 | set as hidden replicas. 31 | 32 | `--[no-]journal` allows you to force write concern's `journal` flag on or off; otherwise it's left at the client default. 33 | 34 | `--lazyfs` mounts Mongo's data directory on the lazyfs filesystem, which means 35 | that process kills not only kill processes, but *also* lose un-fsynced writes. 36 | Helpful for simulating power failures. 37 | 38 | `--local-proxy PATH/TO/LOCAL/BIN` runs a custom binary on the local control 39 | node, and routes client requests to it rather than remote nodes directly. 40 | `--proxy` does the same, but uploads and runs the proxy on each DB node. This 41 | feature was developed for a Jepsen client whose proxy is not public, but it 42 | ought to work with any proxy which takes the same arguments. See 43 | `jepsen.mongodb.db.local-proxy/start!` for details. 44 | 45 | `--max-txn-length` and `max-writes-per-key` govern the maximum size of 46 | transactions (e.g. for the list-append workload) and the number of writes to 47 | any single key before choosing a new key. 48 | 49 | `--nemesis FAULTS` takes a comma-separated list of faults to inject, and 50 | `--nemesis-interval SECONDS` controls roughly how long between nemesis 51 | operations, for each class of fault. 52 | 53 | These tests automatically set write concern even on read-only 54 | transactions--consistent with MongoDB's documentation. However, doing this is 55 | not intuitive and many guides to Mongo transactions omit it. Use 56 | `--no-read-only-write-concern` to do the obvious, wrong thing and *not* provide 57 | a write concern for read-only transactions. This causes snapshot isolation 58 | violations. 59 | 60 | `--rate HZ` controls the upper bound on how many operations per second Jepsen 61 | tries to perform. 62 | 63 | `--read-concern CONCERN` and `--txn-read-concern CONCERN` set the read concern 64 | for single operations and Mongo transactions, respectively. `--write-concern` 65 | and `--txn-write-concern` do the same for write concerns. If omitted, uses 66 | client defaults, which are totally unsafe. 67 | 68 | `--read-preference PREF` controls whether the client tries to read from 69 | primaries, secondaries, etc. 70 | 71 | `--[no-]retry-writes` allows you to explicitly choose whether or not to 72 | enable retryable writes at the client level. Note that Mongo ignores this 73 | setting for some transaction features. 74 | 75 | `--sharded`, if set, runs multiple replica sets, each with 3 replicas. The 76 | first three nodes form one replica set, used for config. The second 3 nodes 77 | form the first data shard, the third 3 nodes form the second data shard, and so 78 | on--you therefore want at least 9 nodes to run a sharded test. 79 | 80 | This test only performs Mongo transactions if the logical Jepsen transaction 81 | has multiple operations--for instance, a read and a write or three writes. 82 | Single reads and single writes are executed directly, without a Mongo 83 | transaction. Use `--singleton-txns` to force a Mongo transaction for *every* 84 | Jepsen transaction. 85 | 86 | `-v VERSION` controls which MongoDB version we install and test. 87 | 88 | `-w WORKLOAD` tells Jepsen which workload to run: e.g. `list-append`. 89 | 90 | ## License 91 | 92 | Copyright © 2020 Jepsen, LLC 93 | 94 | This program and the accompanying materials are made available under the 95 | terms of the Eclipse Public License 2.0 which is available at 96 | http://www.eclipse.org/legal/epl-2.0. 97 | 98 | This Source Code may also be made available under the following Secondary 99 | Licenses when the conditions for such availability set forth in the Eclipse 100 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 101 | the Free Software Foundation, either version 2 of the License, or (at your 102 | option) any later version, with the GNU Classpath Exception which is available 103 | at https://www.gnu.org/software/classpath/license.html. 104 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/db/local_proxy.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.db.local-proxy 2 | "Sets up a local binary which proxies to remote MongoDB nodes." 3 | (:require [byte-streams :as bs] 4 | [clojure [pprint :refer [pprint]] 5 | [string :as str]] 6 | [clojure.java.io :as io] 7 | [clojure.tools.logging :refer [info warn]] 8 | [dom-top.core :refer [real-pmap]] 9 | [jepsen [control :as c] 10 | [core :as jepsen] 11 | [db :as db] 12 | [store :as store] 13 | [util :as util :refer [meh 14 | pprint-str 15 | random-nonempty-subset 16 | with-thread-name]]] 17 | [jepsen.control [net :as cn] 18 | [util :as cu]] 19 | [jepsen.lazyfs :as lazyfs] 20 | [jepsen.os.debian :as debian] 21 | [jepsen.mongodb [client :as client :refer [Conn 22 | host 23 | port]]] 24 | [slingshot.slingshot :refer [try+ throw+]]) 25 | (:import (java.lang Process 26 | ProcessBuilder 27 | ProcessBuilder$Redirect) 28 | (java.io File 29 | IOException 30 | OutputStreamWriter 31 | Writer) 32 | (java.util.concurrent TimeUnit))) 33 | 34 | (def proxy-listener-count 35 | "How many proxy listener ports do we bind?" 36 | 3) 37 | 38 | (defmacro io-thread 39 | "Stolen from Maelstrom. 40 | 41 | Spawns an IO thread for a process. Takes a running? atom, a thread name (e.g. 42 | \"stdin\"), [sym closable-expression ...] bindings (for with-open), a single 43 | loop-recur binding, and a body. Spawns a future, holding the closeable open, 44 | evaluating body in the loop-recur bindings as long as `running?` is true, and 45 | catching/logging exceptions. Body should return the next value for the loop 46 | iteration, or `nil` to terminate." 47 | [running? thread-type open-bindings loop-binding & body] 48 | `(future 49 | (with-thread-name (str "proxy " ~thread-type) 50 | (try 51 | (with-open ~open-bindings 52 | ; There is technically a race condition here: we might be 53 | ; interrupted during evaluation of the loop bindings, *before* we 54 | ; enter the try. Hopefully infrequent. If it happens, not the end of 55 | ; the world; just yields a confusing error message, maybe some weird 56 | ; premature closed-stream behavior. 57 | (loop ~loop-binding 58 | (if-not (deref ~running?) 59 | ; We're done 60 | :done 61 | (recur (try ~@body 62 | (catch IOException e# 63 | ; If the process crashes, we're going to hit 64 | ; IOExceptions trying to write/read streams. 65 | ; That's fine--we're going to learn about crashes 66 | ; when the process shutdown code checks the exit 67 | ; status. 68 | ) 69 | (catch InterruptedException e# 70 | ; We might be interrupted if setup fails, but it's 71 | ; not our job to exit here--we need to keep the 72 | ; process's streams up and running so we can tell 73 | ; if it terminated normally. We'll be terminated 74 | ; by the DB teardown process. 75 | ) 76 | (catch Throwable t# 77 | (warn t# "Error!") 78 | nil)))))) 79 | (catch IOException e# 80 | ; with-open is going to try to close things like OutputWriters, 81 | ; which will actually throw if the process has crashed, because they 82 | ; try to flush the underlying stream buffer, and THAT's closed. We 83 | ; ignore that too; the process shutdown code will alert the user. 84 | :crashed 85 | ))))) 86 | 87 | (defn journal-thread 88 | "Starts a thread which copies :stdout or :stderr to a file." 89 | [^Process process running? type ^Writer log] 90 | (io-thread running? (name type) 91 | [] 92 | [lines (bs/to-line-seq (case type 93 | :stderr (.getErrorStream process) 94 | :stdout (.getInputStream process)))] 95 | (when (seq lines) 96 | (let [line (first lines)] 97 | ; (info "Logging" type line) 98 | (locking log 99 | (.write log line) 100 | (.write log "\n") 101 | (.flush log)) 102 | (next lines))))) 103 | 104 | (defn start! 105 | "Starts the proxy locally for a test." 106 | [test db node] 107 | (let [proxy (:local-proxy test) 108 | bin (if (re-find #"/" proxy) 109 | proxy 110 | (str "./" proxy)) 111 | args ["-mongo-upstream" (str (cn/ip node) ":" (port db test)) 112 | "-listener-count" proxy-listener-count] 113 | _ (info "Launching" bin args) 114 | process (.. (ProcessBuilder. ^java.util.List (map str (cons bin args))) 115 | (redirectOutput ProcessBuilder$Redirect/PIPE) 116 | (redirectInput ProcessBuilder$Redirect/PIPE) 117 | (start)) 118 | running? (atom true) 119 | log (io/writer (store/path! test "proxy.log")) 120 | stdout-thread (journal-thread process running? :stdout log) 121 | stderr-thread (journal-thread process running? :stderr log)] 122 | {:process process 123 | :running? running? 124 | :stdout-thread stdout-thread 125 | :stderr-thread stderr-thread 126 | :log log})) 127 | 128 | (defn stop! 129 | "Stops the local proxy. Takes the same map returned by start!" 130 | [{:keys [^Process process running? stdout-thread stderr-thread ^Writer log]}] 131 | (let [crashed? (not (.isAlive process))] 132 | (when-not crashed? 133 | ; Kill 134 | (.. process destroyForcibly (waitFor 5 (TimeUnit/SECONDS)))) 135 | 136 | ; Shut down workers 137 | (reset! running? false) 138 | @stdout-thread 139 | @stderr-thread 140 | 141 | ; Close log file 142 | (.flush log) 143 | (.close log) 144 | 145 | (when crashed? 146 | (throw+ {:type ::crashed 147 | :exit (.exitValue process)} 148 | nil 149 | (str "Local proxy crashed with exit status " 150 | (.exitValue process) 151 | ". Logs are available in store/current/proxy.log."))))) 152 | 153 | (defrecord LocalProxyDB [db proxy] 154 | Conn 155 | (host [_ test node] 156 | "localhost") 157 | 158 | (port [_ test] 159 | (+ client/proxy-port (rand-int proxy-listener-count))) 160 | 161 | db/DB 162 | (setup! [this test node] 163 | (db/setup! db test node) 164 | (when (and (:local-proxy test) 165 | (= node (jepsen/primary test))) 166 | (deliver proxy (start! test db node)) 167 | (with-open [conn (client/await-open node test)] 168 | ; Huh, it doesn't SUPPORT rs_status. Weird. 169 | ;(info "Proxy reports rs_status" 170 | ; (pprint-str 171 | ; (client/admin-command! conn {:replSetGetStatus 1}) 172 | ))) 173 | 174 | (teardown! [this test node] 175 | (db/teardown! db test node) 176 | (when (and (realized? proxy) 177 | (= node (jepsen/primary test))) 178 | (stop! @proxy))) 179 | 180 | db/Primary 181 | (setup-primary! [_ test node] 182 | (db/setup-primary! db test node)) 183 | 184 | (primaries [_ test] 185 | (db/primaries db test)) 186 | 187 | db/LogFiles 188 | (log-files [_ test node] 189 | (db/log-files db test node)) 190 | 191 | ; We don't bother injecting faults into the proxy itself 192 | db/Process 193 | (start! [_ test node] 194 | (db/start! db test node)) 195 | 196 | (kill! [_ test node] 197 | (db/kill! db test node)) 198 | 199 | db/Pause 200 | (pause! [_ test node] 201 | (db/pause! db test node)) 202 | 203 | (resume! [this test node] 204 | (db/resume! db test node))) 205 | 206 | (defn db 207 | "Constructs a LocalProxy DB wrapping another DB." 208 | [db] 209 | (LocalProxyDB. db (promise))) 210 | -------------------------------------------------------------------------------- /src/jepsen/mongodb.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb 2 | "Constructs tests and handles CLI arguments" 3 | (:require [clojure.tools.logging :refer [info warn]] 4 | [clojure [string :as str] 5 | [pprint :refer [pprint]]] 6 | [jepsen [cli :as cli] 7 | [checker :as checker] 8 | [tests :as tests] 9 | [util :as util]] 10 | [jepsen.os.debian :as debian] 11 | [jepsen.generator :as gen] 12 | [jepsen.mongodb [db :as db] 13 | [list-append :as list-append] 14 | [list-append-multi-node :as list-append-multi-node] 15 | [nemesis :as nemesis]])) 16 | 17 | (def workloads 18 | {:list-append list-append/workload 19 | :list-append-multi-node list-append-multi-node/workload 20 | :none (fn [_] tests/noop-test)}) 21 | 22 | (def all-workloads 23 | "A collection of workloads we run by default." 24 | (remove #{:none} (keys workloads))) 25 | 26 | (def workloads-expected-to-pass 27 | "A collection of workload names which we expect should actually pass." 28 | (remove #{} all-workloads)) 29 | 30 | (def all-nemeses 31 | "Combinations of nemeses for tests" 32 | [[] 33 | [:partition] 34 | [:kill] 35 | [:pause] 36 | [:pause :kill :partition :clock :member]]) 37 | 38 | (def special-nemeses 39 | "A map of special nemesis names to collections of faults" 40 | {:none [] 41 | :all [:pause :kill :partition :clock :member]}) 42 | 43 | (defn parse-nemesis-spec 44 | "Takes a comma-separated nemesis string and returns a collection of keyword 45 | faults." 46 | [spec] 47 | (->> (str/split spec #",") 48 | (map keyword) 49 | (mapcat #(get special-nemeses % [%])))) 50 | 51 | (def logging-overrides 52 | "Custom log levels; Mongo's driver is... communicative" 53 | {;"jepsen.mongodb.client" :error 54 | "org.mongodb.driver.client" :error 55 | "org.mongodb.driver.cluster" :error 56 | "org.mongodb.driver.connection" :error}) 57 | 58 | (defn mongodb-test 59 | "Given an options map from the command line runner (e.g. :nodes, :ssh, 60 | :concurrency, ...), constructs a test map." 61 | [opts] 62 | (let [workload-name (:workload opts) 63 | workload ((workloads workload-name) opts) 64 | db (db/db opts) 65 | nemesis (nemesis/nemesis-package 66 | {:db db 67 | :sharded (:sharded opts) 68 | :nodes (:nodes opts) 69 | :faults (:nemesis opts) 70 | ;:partition {:targets [:primaries]} 71 | ;:pause {:targets [:primaries]} 72 | ;:kill {:targets [:all]} 73 | :pause {:targets [nil :one :primaries :majority :all]} 74 | :kill {:targets [nil :one :primaries :majority :all]} 75 | :interval (:nemesis-interval opts)})] 76 | (merge tests/noop-test 77 | opts 78 | {:name (str "mongodb " (:version opts) 79 | " " (name workload-name) 80 | (when-let [w (:write-concern opts)] (str " w:" w)) 81 | (when-let [r (:read-concern opts)] (str " r:" r)) 82 | (when-let [w (:txn-write-concern opts)] (str " tw:" w)) 83 | (when-let [r (:txn-read-concern opts)] (str " tr:" r)) 84 | (let [j (:journal opts)] 85 | (when-not (nil? j) 86 | (str " j:" j))) 87 | (when (:singleton-txns opts) " singleton-txns") 88 | (when (:proxy opts) " proxy") 89 | (when (:local-proxy opts) " local-proxy") 90 | " " (str/join "," (map name (:nemesis opts)))) 91 | :pure-generators true 92 | :logging {:overrides logging-overrides} 93 | :os debian/os 94 | :db db 95 | :checker (checker/compose 96 | {:perf (checker/perf 97 | {:nemeses (:perf nemesis)}) 98 | :clock (checker/clock-plot) 99 | :stats (checker/stats) 100 | :exceptions (checker/unhandled-exceptions) 101 | :workload (:checker workload) 102 | :crash (checker/log-file-pattern 103 | #"[Ff]atal" "mongod.log")}) 104 | :client (:client workload) 105 | :nemesis (:nemesis nemesis) 106 | :generator (gen/phases 107 | (->> (:generator workload) 108 | (gen/stagger (/ (:rate opts))) 109 | (gen/nemesis (gen/phases 110 | (gen/sleep 10) 111 | (:generator nemesis))) 112 | (gen/time-limit (:time-limit opts))))}))) 113 | 114 | (def cli-opts 115 | "Additional CLI options" 116 | [[nil "--hidden NUM" "Number of hidden replicas per replica set." 117 | :parse-fn parse-long 118 | :default 0 119 | :validate [(complement neg?) "Must be non-negative"]] 120 | 121 | [nil "--[no-]journal" "Force journaling for write concerns to be either enabled or disabled. If unset, leaves journaling at the default."] 122 | 123 | [nil "--[no-]lazyfs" "Mounts the MongoDB data dir in a lazyfs, and drops the page cache on process kill." 124 | :default false] 125 | 126 | [nil "--local-proxy BIN" "A proxy binary to run locally and make requests to."] 127 | 128 | [nil "--max-txn-length NUM" "Maximum number of operations in a transaction." 129 | :default 4 130 | :parse-fn parse-long 131 | :validate [pos? "Must be a positive integer"]] 132 | 133 | [nil "--max-writes-per-key NUM" "Maximum number of writes to any given key." 134 | :default 256 135 | :parse-fn parse-long 136 | :validate [pos? "Must be a positive integer."]] 137 | 138 | [nil "--nemesis FAULTS" "A comma-separated list of nemesis faults to enable" 139 | :parse-fn parse-nemesis-spec 140 | :validate [(partial every? #{:pause :kill :partition :clock :member}) 141 | "Faults must be pause, kill, partition, clock, or member, or the special faults all or none."]] 142 | 143 | 144 | [nil "--nemesis-interval SECS" "Roughly how long between nemesis operations." 145 | :default 2 146 | :parse-fn read-string 147 | :validate [pos? "Must be a positive integer."]] 148 | 149 | [nil "--no-read-only-txn-write-concern" "Don't set write concern on read-only transactions" 150 | :default false] 151 | 152 | ["-p" "--proxy BIN" "A proxy binary to upload to each node and make requests to."] 153 | 154 | ["-r" "--rate HZ" "Approximate number of requests per second, total" 155 | :default 1000 156 | :parse-fn read-string 157 | :validate [#(and (number? %) (pos? %)) "Must be a positive number"]] 158 | 159 | [nil "--read-concern LEVEL" "What level of read concern to use." 160 | :default nil] 161 | 162 | [nil "--read-preference LEVEL" "What read preference to use (e.g. 'secondary')" 163 | :default nil] 164 | 165 | [nil "--[no-]retry-writes" "Explicitly enables or disables retryable writes at the client level."] 166 | 167 | [nil "--repro-48307" "If set, tries to generate transactions which specifically trigger SERVER-48307, which manifested in version 4.2.6"] 168 | 169 | [nil "--sharded" "If set, set up a multi-shard MongoDB fronted by Mongos." 170 | :default false] 171 | 172 | ;[nil "--shard-key KEY" "Either `id` or `value`" 173 | ; :default :id 174 | ; :parse-fn keyword 175 | ; :validate [#{:id :value} "Must be either `id` or `value`"]] 176 | 177 | [nil "--singleton-txns" "If set, execute even single operations in a transactional context." 178 | :default false] 179 | 180 | [nil "--txn-read-concern LEVEL" "What level of read concern should we use in transactions?"] 181 | 182 | [nil "--txn-write-concern LEVEL" "What level of write concern should we use in transactions?"] 183 | 184 | ["-v" "--version STRING" "What version of MongoDB should we test?" 185 | :default "7.0.0-rc8"] 186 | 187 | ["-w" "--workload NAME" "What workload should we run?" 188 | :parse-fn keyword 189 | :default :list-append 190 | :validate [workloads (cli/one-of workloads)]] 191 | 192 | [nil "--write-concern LEVEL" "What level of write concern to use." 193 | :default nil] 194 | ]) 195 | 196 | (defn all-test-options 197 | "Takes base cli options, a collection of nemeses, workloads, and a test count, 198 | and constructs a sequence of test options." 199 | [cli nemeses workloads] 200 | (for [n nemeses, w workloads, i (range (:test-count cli))] 201 | (assoc cli 202 | :nemesis n 203 | :workload w))) 204 | 205 | (defn all-tests 206 | "Turns CLI options into a sequence of tests." 207 | [test-fn cli] 208 | (let [nemeses (if-let [n (:nemesis cli)] [n] all-nemeses) 209 | workloads (if-let [w (:workload cli)] [w] 210 | (if (:only-workloads-expected-to-pass cli) 211 | workloads-expected-to-pass 212 | all-workloads))] 213 | (->> (all-test-options cli nemeses workloads) 214 | (map test-fn)))) 215 | 216 | (defn -main 217 | "Handles command line arguments. Can either run a test, or a web server for 218 | browsing results." 219 | [& args] 220 | (cli/run! (merge (cli/single-test-cmd {:test-fn mongodb-test 221 | :opt-spec cli-opts}) 222 | (cli/test-all-cmd {:tests-fn (partial all-tests mongodb-test) 223 | :opt-spec cli-opts}) 224 | (cli/serve-cmd)) 225 | args)) 226 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/list_append.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.list-append 2 | "Elle list append workload" 3 | (:require [clojure [pprint :refer [pprint]]] 4 | [clojure.tools.logging :refer [info warn]] 5 | [dom-top.core :refer [loopr 6 | with-retry]] 7 | [elle.list-append :as elle.list-append] 8 | [jepsen [client :as client] 9 | [checker :as checker] 10 | [generator :as gen] 11 | [util :as util :refer [timeout 12 | map-vals]]] 13 | [jepsen.tests.cycle :as cycle] 14 | [jepsen.tests.cycle.append :as list-append] 15 | [jepsen.mongodb [client :as c]] 16 | [slingshot.slingshot :as slingshot]) 17 | (:import (java.util.concurrent TimeUnit) 18 | (com.mongodb MongoCommandException 19 | MongoNodeIsRecoveringException 20 | MongoNotPrimaryException 21 | TransactionOptions 22 | ReadConcern 23 | ReadPreference 24 | WriteConcern) 25 | (com.mongodb.client.model Filters 26 | UpdateOptions))) 27 | 28 | (def db-name "jepsendb") 29 | (def coll-name "jepsencoll") 30 | 31 | (defn txn-options 32 | "Constructs options for this transaction." 33 | [test txn] 34 | ; Transactions retry for well over 100 seconds and I cannot for the life of 35 | ; me find what switch makes that timeout shorter. MaxCommitTime only affects 36 | ; a *single* invocation of the transaction, not the retries. We work around 37 | ; this by timing out in Jepsen as well. 38 | (cond-> (TransactionOptions/builder) 39 | true (.maxCommitTime 5 TimeUnit/SECONDS) 40 | 41 | ; MongoDB *ignores* the DB and collection-level read and write concerns 42 | ; within a transaction, which seems... bad, because it actually 43 | ; *downgrades* safety if you chose high levels at the db or collection 44 | ; levels! We have to set them here too. 45 | (:txn-read-concern test) 46 | (.readConcern (c/read-concern (:txn-read-concern test))) 47 | 48 | (and (:txn-write-concern test) 49 | ; If the transaction is read-only, and we have 50 | ; no-read-only-txn-write-concern set, we don't bother setting the write 51 | ; concern. 52 | (not (and (every? (comp #{:r} first) txn) 53 | (:no-read-only-txn-write-concern test)))) 54 | (.writeConcern (c/write-concern (:txn-write-concern test) (:journal test))) 55 | 56 | ; Read preferences must always be primary; no sense in setting a pref here 57 | true .build)) 58 | 59 | (defn apply-mop! 60 | "Applies a transactional micro-operation to a connection." 61 | [test db session [f k v :as mop]] 62 | (let [coll (c/collection db coll-name)] 63 | ;(info (with-out-str 64 | ; (println "db levels") 65 | ; (prn :sn-rc ReadConcern/SNAPSHOT) 66 | ; (prn :ma-rc ReadConcern/MAJORITY) 67 | ; (prn :db-rc (.getReadConcern db)) 68 | ; (prn :ma-wc WriteConcern/MAJORITY) 69 | ; (prn :db-wc (.getWriteConcern db)))) 70 | (case f 71 | :r [f k (vec (:value (c/find-one coll session k)))] 72 | :append (let [filt (Filters/eq "_id" k) 73 | doc (c/->doc {:$push {:value v}}) 74 | opts (.. (UpdateOptions.) (upsert true)) 75 | res (if session 76 | (.updateOne coll session filt doc opts) 77 | (.updateOne coll filt doc opts))] 78 | ;(info :res res) 79 | mop)))) 80 | 81 | (defn create-coll! 82 | "Creates and optionally shards the collection for this test." 83 | [test conn] 84 | (with-retry [tries 5] 85 | (let [db (c/db conn db-name test)] 86 | (when (:sharded test) 87 | (c/admin-command! conn {:enableSharding db-name})) 88 | 89 | (info "creating collection") 90 | (let [coll (c/create-collection! db coll-name)] 91 | (info "Collection created") 92 | (when (:sharded test) 93 | ; Shard it! 94 | (c/admin-command! conn 95 | {:shardCollection (str db-name "." coll-name) 96 | :key {:_id :hashed} 97 | ; WIP; gotta figure out how we're going to 98 | ; generate queries with the shard key in them. 99 | ;:key {(case (:shard-key test) 100 | ; :id :_id 101 | ; :value :value) 102 | ; :hashed} 103 | :numInitialChunks 7}) 104 | (info "Collection sharded")))) 105 | (catch MongoNotPrimaryException e 106 | ; sigh, why is this a thing 107 | (info "Ignoring MongoNotPrimaryException") 108 | nil) 109 | (catch MongoNodeIsRecoveringException e 110 | (info "Caught MongoNodeIsRecoveringException" tries (.getMessage e)) 111 | (if (pos? tries) 112 | (do (info "Couldn't create collection:" (.getMessage e) " - retrying") 113 | (Thread/sleep 5000) 114 | (retry (dec tries))) 115 | (throw e))) 116 | (catch com.mongodb.MongoSocketReadTimeoutException e 117 | (if (pos? tries) 118 | (do (info "Timed out sharding DB and creating collection; waiting to retry") 119 | (Thread/sleep 5000) 120 | (retry (dec tries))) 121 | (throw e))) 122 | (catch MongoCommandException e 123 | (condp re-find (.getMessage e) 124 | #"Collection already exists" nil 125 | (throw e))))) 126 | 127 | (defrecord Client [conn] 128 | client/Client 129 | (open! [this test node] 130 | (assoc this :conn (c/open node test))) 131 | 132 | (setup! [this test] 133 | (create-coll! test conn)) 134 | 135 | (invoke! [this test op] 136 | (let [txn (:value op)] 137 | (c/with-errors op 138 | (timeout 5000 (assoc op :type :info, :error :timeout) 139 | (let [db (c/db conn db-name test) 140 | txn' (if (and (<= (count txn) 1) 141 | (not (:singleton-txns test))) 142 | ; We can run without a transaction 143 | [(apply-mop! test db nil (first txn))] 144 | 145 | ; We need a transaction 146 | (with-open [session (c/start-session conn)] 147 | (let [opts (txn-options test txn) 148 | body (c/txn 149 | ;(info :txn-begins) 150 | (mapv (partial apply-mop! 151 | test db session) 152 | (:value op)))] 153 | (.withTransaction session body opts))))] 154 | (assoc op :type :ok, :value txn')))))) 155 | 156 | (teardown! [this test]) 157 | 158 | (close! [this test] 159 | (c/close! conn))) 160 | 161 | (defn divergence-stats-checker 162 | "A checker which tries to estimate the fraction of writes which are lost to 163 | replica divergence. 164 | 165 | TODO: this is not very good. We use the longest value observed for a key as 166 | authoritative, but often Mongo loses a long value and replaces it with a 167 | shorter one, which causes us to undercount divergence. We could try to pick 168 | the *last* value observed, but of course that's not perfectly rigorous 169 | either..." 170 | [] 171 | (reify checker/Checker 172 | (check [this test history opts] 173 | ; Build up a map of keys to the final observed values for those keys.sorted distinct observed values of that key. 174 | (let [sorted-values (->> history 175 | (remove (comp #{:nemesis} :process)) 176 | elle.list-append/sorted-values)] 177 | (loopr [longest (transient {}) 178 | diverged (transient {})] 179 | [[k values] sorted-values] 180 | ; Find the longest value 181 | (let [longest-k (last values)] 182 | (recur 183 | (assoc! longest k longest-k) 184 | ; Now zip through each value and record every value which 185 | ; diverged from the longest version. 186 | (loopr [diverged diverged] 187 | [value values] 188 | ; And for each element... 189 | (recur 190 | (loopr [i 0 191 | diverged diverged] 192 | [element value] 193 | (let [expected (nth longest-k i)] 194 | (recur (inc i) 195 | (if (= element expected) 196 | diverged 197 | (let [dk (-> diverged 198 | (get k #{}) 199 | (conj element))] 200 | (assoc! diverged k dk))))) 201 | diverged))))) 202 | ; Great, now that we have the diverged values and longest values 203 | ; for k, compute stats 204 | (let [longest (persistent! longest) 205 | diverged (persistent! diverged) 206 | ; How many observed values in the longest values, across 207 | ; all keys? Note that we're ignoring duplicates here. 208 | longest-count (->> longest 209 | vals 210 | (map (comp count set)) 211 | (reduce + 0)) 212 | ; How many divergent values? 213 | div-count (->> diverged 214 | vals 215 | (map count) 216 | (reduce + 0))] 217 | {:valid? (zero? div-count) 218 | :longest-count longest-count 219 | :diverged-count div-count 220 | :diverged-frac (if (zero? (+ div-count longest-count)) 221 | 0 222 | (float (/ div-count 223 | (+ div-count longest-count)))) 224 | ;:longest longest 225 | ;:diverged diverged 226 | })))))) 227 | 228 | (defn gen-48307 229 | "A generator variant which specifically targets 230 | https://jira.mongodb.org/browse/SERVER-48307. We produce single writes and a 231 | plethora of reads." 232 | [opts] 233 | (->> (list-append/gen {; We want a decent chance of choosing writes and reads 234 | ; to *different* shards; the exponential approach will 235 | ; concentrate most ops on one shard. 236 | :key-dist :uniform 237 | :min-txn-length 4 238 | :max-txn-length (:max-txn-length opts 4) 239 | ; The default here, 3, wouldn't give us a decent 240 | ; chance of picking keys from different shards. 241 | :key-count 7 242 | :max-writes-per-key (:max-writes-per-key opts)}) 243 | ; Ensure every txn writes something 244 | (gen/filter (fn ensure-a-write [op] 245 | (->> (:value op) 246 | (map first) 247 | (some #{:append})))) 248 | ; Ensure every txn writes to exactly one key 249 | (gen/map (fn rewrite [op] 250 | (let [txn (:value op) 251 | ; Which key will we write? 252 | write-key (->> txn 253 | (keep (fn [[f k v]] 254 | (when (= :append f) 255 | k))) 256 | distinct 257 | rand-nth) 258 | ; Rewrite other writes to reads 259 | txn' (mapv (fn [[f k v :as mop]] 260 | (if (and (= f :append) 261 | (not= k write-key)) 262 | [:r k nil] 263 | mop)) 264 | txn)] 265 | (assoc op :value txn')))))) 266 | 267 | (defn workload 268 | "A generator, client, and checker for a list-append test." 269 | [opts] 270 | (-> (list-append/test {:key-count 10 271 | :key-dist :exponential 272 | ;:key-dist :uniform 273 | :max-txn-length (:max-txn-length opts 4) 274 | :max-writes-per-key (:max-writes-per-key opts) 275 | :consistency-models [:strong-snapshot-isolation] 276 | :cycle-search-timeout 1000}) 277 | (assoc :client (Client. nil)) 278 | (update :checker (fn [c] 279 | (checker/compose 280 | {:elle c 281 | :divergence (divergence-stats-checker)}))) 282 | (cond-> 283 | (:repro-48307 opts) (assoc :generator (gen-48307 opts))))) 284 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | 57 | "Secondary License" means either the GNU General Public License, 58 | Version 2.0, or any later versions of that license, including any 59 | exceptions or additional permissions as identified by the initial 60 | Contributor. 61 | 62 | 2. GRANT OF RIGHTS 63 | 64 | a) Subject to the terms of this Agreement, each Contributor hereby 65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 66 | license to reproduce, prepare Derivative Works of, publicly display, 67 | publicly perform, Distribute and sublicense the Contribution of such 68 | Contributor, if any, and such Derivative Works. 69 | 70 | b) Subject to the terms of this Agreement, each Contributor hereby 71 | grants Recipient a non-exclusive, worldwide, royalty-free patent 72 | license under Licensed Patents to make, use, sell, offer to sell, 73 | import and otherwise transfer the Contribution of such Contributor, 74 | if any, in Source Code or other form. This patent license shall 75 | apply to the combination of the Contribution and the Program if, at 76 | the time the Contribution is added by the Contributor, such addition 77 | of the Contribution causes such combination to be covered by the 78 | Licensed Patents. The patent license shall not apply to any other 79 | combinations which include the Contribution. No hardware per se is 80 | licensed hereunder. 81 | 82 | c) Recipient understands that although each Contributor grants the 83 | licenses to its Contributions set forth herein, no assurances are 84 | provided by any Contributor that the Program does not infringe the 85 | patent or other intellectual property rights of any other entity. 86 | Each Contributor disclaims any liability to Recipient for claims 87 | brought by any other entity based on infringement of intellectual 88 | property rights or otherwise. As a condition to exercising the 89 | rights and licenses granted hereunder, each Recipient hereby 90 | assumes sole responsibility to secure any other intellectual 91 | property rights needed, if any. For example, if a third party 92 | patent license is required to allow Recipient to Distribute the 93 | Program, it is Recipient's responsibility to acquire that license 94 | before distributing the Program. 95 | 96 | d) Each Contributor represents that to its knowledge it has 97 | sufficient copyright rights in its Contribution, if any, to grant 98 | the copyright license set forth in this Agreement. 99 | 100 | e) Notwithstanding the terms of any Secondary License, no 101 | Contributor makes additional grants to any Recipient (other than 102 | those set forth in this Agreement) as a result of such Recipient's 103 | receipt of the Program under the terms of a Secondary License 104 | (if permitted under the terms of Section 3). 105 | 106 | 3. REQUIREMENTS 107 | 108 | 3.1 If a Contributor Distributes the Program in any form, then: 109 | 110 | a) the Program must also be made available as Source Code, in 111 | accordance with section 3.2, and the Contributor must accompany 112 | the Program with a statement that the Source Code for the Program 113 | is available under this Agreement, and informs Recipients how to 114 | obtain it in a reasonable manner on or through a medium customarily 115 | used for software exchange; and 116 | 117 | b) the Contributor may Distribute the Program under a license 118 | different than this Agreement, provided that such license: 119 | i) effectively disclaims on behalf of all other Contributors all 120 | warranties and conditions, express and implied, including 121 | warranties or conditions of title and non-infringement, and 122 | implied warranties or conditions of merchantability and fitness 123 | for a particular purpose; 124 | 125 | ii) effectively excludes on behalf of all other Contributors all 126 | liability for damages, including direct, indirect, special, 127 | incidental and consequential damages, such as lost profits; 128 | 129 | iii) does not attempt to limit or alter the recipients' rights 130 | in the Source Code under section 3.2; and 131 | 132 | iv) requires any subsequent distribution of the Program by any 133 | party to be under a license that satisfies the requirements 134 | of this section 3. 135 | 136 | 3.2 When the Program is Distributed as Source Code: 137 | 138 | a) it must be made available under this Agreement, or if the 139 | Program (i) is combined with other material in a separate file or 140 | files made available under a Secondary License, and (ii) the initial 141 | Contributor attached to the Source Code the notice described in 142 | Exhibit A of this Agreement, then the Program may be made available 143 | under the terms of such Secondary Licenses, and 144 | 145 | b) a copy of this Agreement must be included with each copy of 146 | the Program. 147 | 148 | 3.3 Contributors may not remove or alter any copyright, patent, 149 | trademark, attribution notices, disclaimers of warranty, or limitations 150 | of liability ("notices") contained within the Program from any copy of 151 | the Program which they Distribute, provided that Contributors may add 152 | their own appropriate notices. 153 | 154 | 4. COMMERCIAL DISTRIBUTION 155 | 156 | Commercial distributors of software may accept certain responsibilities 157 | with respect to end users, business partners and the like. While this 158 | license is intended to facilitate the commercial use of the Program, 159 | the Contributor who includes the Program in a commercial product 160 | offering should do so in a manner which does not create potential 161 | liability for other Contributors. Therefore, if a Contributor includes 162 | the Program in a commercial product offering, such Contributor 163 | ("Commercial Contributor") hereby agrees to defend and indemnify every 164 | other Contributor ("Indemnified Contributor") against any losses, 165 | damages and costs (collectively "Losses") arising from claims, lawsuits 166 | and other legal actions brought by a third party against the Indemnified 167 | Contributor to the extent caused by the acts or omissions of such 168 | Commercial Contributor in connection with its distribution of the Program 169 | in a commercial product offering. The obligations in this section do not 170 | apply to any claims or Losses relating to any actual or alleged 171 | intellectual property infringement. In order to qualify, an Indemnified 172 | Contributor must: a) promptly notify the Commercial Contributor in 173 | writing of such claim, and b) allow the Commercial Contributor to control, 174 | and cooperate with the Commercial Contributor in, the defense and any 175 | related settlement negotiations. The Indemnified Contributor may 176 | participate in any such claim at its own expense. 177 | 178 | For example, a Contributor might include the Program in a commercial 179 | product offering, Product X. That Contributor is then a Commercial 180 | Contributor. If that Commercial Contributor then makes performance 181 | claims, or offers warranties related to Product X, those performance 182 | claims and warranties are such Commercial Contributor's responsibility 183 | alone. Under this section, the Commercial Contributor would have to 184 | defend claims against the other Contributors related to those performance 185 | claims and warranties, and if a court requires any other Contributor to 186 | pay any damages as a result, the Commercial Contributor must pay 187 | those damages. 188 | 189 | 5. NO WARRANTY 190 | 191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" 193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF 195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 196 | PURPOSE. Each Recipient is solely responsible for determining the 197 | appropriateness of using and distributing the Program and assumes all 198 | risks associated with its exercise of rights under this Agreement, 199 | including but not limited to the risks and costs of program errors, 200 | compliance with applicable laws, damage to or loss of data, programs 201 | or equipment, and unavailability or interruption of operations. 202 | 203 | 6. DISCLAIMER OF LIABILITY 204 | 205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS 207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST 209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE 213 | POSSIBILITY OF SUCH DAMAGES. 214 | 215 | 7. GENERAL 216 | 217 | If any provision of this Agreement is invalid or unenforceable under 218 | applicable law, it shall not affect the validity or enforceability of 219 | the remainder of the terms of this Agreement, and without further 220 | action by the parties hereto, such provision shall be reformed to the 221 | minimum extent necessary to make such provision valid and enforceable. 222 | 223 | If Recipient institutes patent litigation against any entity 224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the 225 | Program itself (excluding combinations of the Program with other software 226 | or hardware) infringes such Recipient's patent(s), then such Recipient's 227 | rights granted under Section 2(b) shall terminate as of the date such 228 | litigation is filed. 229 | 230 | All Recipient's rights under this Agreement shall terminate if it 231 | fails to comply with any of the material terms or conditions of this 232 | Agreement and does not cure such failure in a reasonable period of 233 | time after becoming aware of such noncompliance. If all Recipient's 234 | rights under this Agreement terminate, Recipient agrees to cease use 235 | and distribution of the Program as soon as reasonably practicable. 236 | However, Recipient's obligations under this Agreement and any licenses 237 | granted by Recipient relating to the Program shall continue and survive. 238 | 239 | Everyone is permitted to copy and distribute copies of this Agreement, 240 | but in order to avoid inconsistency the Agreement is copyrighted and 241 | may only be modified in the following manner. The Agreement Steward 242 | reserves the right to publish new versions (including revisions) of 243 | this Agreement from time to time. No one other than the Agreement 244 | Steward has the right to modify this Agreement. The Eclipse Foundation 245 | is the initial Agreement Steward. The Eclipse Foundation may assign the 246 | responsibility to serve as the Agreement Steward to a suitable separate 247 | entity. Each new version of the Agreement will be given a distinguishing 248 | version number. The Program (including Contributions) may always be 249 | Distributed subject to the version of the Agreement under which it was 250 | received. In addition, after a new version of the Agreement is published, 251 | Contributor may elect to Distribute the Program (including its 252 | Contributions) under the new version. 253 | 254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient 255 | receives no rights or licenses to the intellectual property of any 256 | Contributor under this Agreement, whether expressly, by implication, 257 | estoppel or otherwise. All rights in the Program not expressly granted 258 | under this Agreement are reserved. Nothing in this Agreement is intended 259 | to be enforceable by any entity that is not a Contributor or Recipient. 260 | No third-party beneficiary rights are created under this Agreement. 261 | 262 | Exhibit A - Form of Secondary Licenses Notice 263 | 264 | "This Source Code may also be made available under the following 265 | Secondary Licenses when the conditions for such availability set forth 266 | in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), 267 | version(s), and exceptions or additional permissions here}." 268 | 269 | Simply including a copy of this Agreement, including this Exhibit A 270 | is not sufficient to license the Source Code under Secondary Licenses. 271 | 272 | If it is not possible or desirable to put the notice in a particular 273 | file, then You may include the notice in a location (such as a LICENSE 274 | file in a relevant directory) where a recipient would be likely to 275 | look for such a notice. 276 | 277 | You may add additional accurate notices of copyright ownership. 278 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/db.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.db 2 | "Database setup and automation." 3 | (:require [clojure [pprint :refer [pprint]] 4 | [string :as str]] 5 | [clojure.java.io :as io] 6 | [clojure.tools.logging :refer [info warn]] 7 | [dom-top.core :refer [real-pmap]] 8 | [jepsen [control :as c] 9 | [core :as jepsen] 10 | [db :as db] 11 | [util :as util :refer [meh random-nonempty-subset sh]]] 12 | [jepsen.control [net :as cn] 13 | [util :as cu]] 14 | [jepsen.lazyfs :as lazyfs] 15 | [jepsen.os.debian :as debian] 16 | [jepsen.mongodb [client :as client :refer [Conn 17 | host 18 | port]]] 19 | [jepsen.mongodb.db [local-proxy :as local-proxy] 20 | [proxy :as proxy]] 21 | [slingshot.slingshot :refer [try+ throw+]])) 22 | 23 | (def log-file "/var/log/mongodb/mongod.log") 24 | (def data-dir "/var/lib/mongodb") 25 | (def user "mongodb") 26 | 27 | (def mongos-dir "/tmp/mongos") 28 | (def mongos-log-file "/var/log/mongodb/mongos.stdout") 29 | (def mongos-pid-file (str mongos-dir "/mongos.pid")) 30 | (def mongos-bin "mongos") 31 | 32 | (def subpackages 33 | "MongoDB has like five different packages to install; these are the ones we 34 | want." 35 | ["server" 36 | "mongos" 37 | "shell"]) 38 | 39 | (defn deb-url 40 | "What's the URL of the Debian package we install?" 41 | [test subpackage] 42 | (let [version (:version test) 43 | rc? (boolean (re-find #"-rc" version)) 44 | ; Versions like 1.2.3-rc2 need to become 1.2.3~rc2" 45 | version (str/replace version "-" "~") 46 | ; Mongo puts a "4.2" in the URL for "4.2.1", so we have to compute that 47 | ; too, except for rcs, which live in 'testing', I think? 48 | small-version (if rc? 49 | "testing" 50 | (re-find #"^\d+\.\d+" version))] 51 | (str "https://repo.mongodb.org/apt/debian/dists/buster/mongodb-org/" 52 | small-version "/main/binary-amd64/mongodb-org-" subpackage "_" 53 | version "_amd64.deb"))) 54 | 55 | (defn install! 56 | [test] 57 | "Installs MongoDB on the current node." 58 | (c/su 59 | ; Prereqs 60 | (c/exec :mkdir :-p "/tmp/jepsen") 61 | (c/cd "/tmp/jepsen" 62 | (doseq [subpackage subpackages] 63 | (when-not (= (:version test) 64 | (debian/installed-version (str "mongodb-org-" 65 | subpackage))) 66 | (let [file (cu/wget! (deb-url test subpackage))] 67 | (info "Installing" subpackage (:version test)) 68 | (c/exec :dpkg :-i :--force-confnew file)) 69 | (c/exec :systemctl :daemon-reload)))) 70 | ; We may have nuked the data dir if we ran with lazyfs earlier, so ensure 71 | ; it exists. 72 | (when-not (cu/exists? data-dir) 73 | (c/exec :mkdir :-p data-dir) 74 | (c/exec :chown (str user ":" user) data-dir)))) 75 | 76 | (defn config-server? 77 | "Takes a test map, and returns true iff this set of nodes is intended to be a 78 | configsvr." 79 | [test] 80 | (= (:replica-set-name test) "rs_config")) 81 | 82 | (defn configure! 83 | "Sets up configuration files" 84 | [test node] 85 | (c/su 86 | (c/exec :echo :> "/etc/mongod.conf" 87 | (-> (slurp (io/resource "mongod.conf")) 88 | (str/replace "%IP%" (cn/ip node)) 89 | (str/replace "%REPL_SET_NAME%" 90 | (:replica-set-name test "rs_jepsen")) 91 | (str/replace "%CLUSTER_ROLE%" 92 | (if (config-server? test) 93 | "configsvr" 94 | "shardsvr")))))) 95 | 96 | (defn start! 97 | "Starts mongod" 98 | [test node] 99 | (info "Starting mongod") 100 | (c/su (c/exec :systemctl :start :mongod))) 101 | 102 | (defn stop! 103 | "Stops the mongodb service" 104 | [test node] 105 | (info "Stopping mongod") 106 | (try+ 107 | (c/su (c/exec :systemctl :stop :mongod)) 108 | (catch [:exit 5] e 109 | ; Not loaded; we probably haven't installed 110 | ))) 111 | 112 | (defn wipe! 113 | "Removes logs and data files" 114 | [test node] 115 | (c/su (c/exec :rm :-rf log-file (c/lit (str data-dir "/*"))))) 116 | 117 | ;; Replica sets 118 | 119 | (defn target-replica-set-config 120 | "Generates the config for a replset in a given test." 121 | [test] 122 | (let [; Set aside some nodes as hidden replicas 123 | [hidden normal] (->> (:nodes test) 124 | reverse 125 | (split-at (:hidden test)) 126 | (map set))] 127 | {:_id (:replica-set-name test "rs_jepsen") 128 | :configsvr (config-server? test) 129 | ; See https://docs.mongodb.com/manual/reference/replica-configuration/#rsconf.settings.catchUpTimeoutMillis 130 | :settings {:heartbeatTimeoutSecs 1 131 | :electionTimeoutMillis 1000 132 | :catchUpTimeoutMillis 1000 133 | :catchUpTakeoverDelayMillis 3000} 134 | :members (->> test 135 | :nodes 136 | (map-indexed (fn [i node] 137 | {:_id i 138 | :priority (if (hidden node) 139 | 0 140 | (- (count (:nodes test)) i)) 141 | :votes (if (hidden node) 142 | 0 143 | 1) 144 | :hidden (boolean (hidden node)) 145 | :host (str node ":" 146 | (if (config-server? test) 147 | client/config-port 148 | client/shard-port))})))})) 149 | 150 | (defn replica-set-initiate! 151 | "Initialize a replica set on a node." 152 | [conn config] 153 | (client/admin-command! conn {:replSetInitiate config})) 154 | 155 | (defn replica-set-config 156 | "Returns the current repl set config" 157 | [conn] 158 | (client/admin-command! conn {:replSetConfig 1})) 159 | 160 | (defn replica-set-status 161 | "Returns the current replica set status." 162 | [conn] 163 | (client/admin-command! conn {:replSetGetStatus 1})) 164 | 165 | (defn primaries 166 | "What nodes does this conn think are primaries?" 167 | [conn] 168 | (->> (replica-set-status conn) 169 | :members 170 | (filter #(= "PRIMARY" (:stateStr %))) 171 | (map :name) 172 | (map client/addr->node))) 173 | 174 | (defn primary 175 | "Which single node does this conn think the primary is? Throws for multiple 176 | primaries, cuz that sounds like a fun and interesting bug, haha." 177 | [conn] 178 | (let [ps (primaries conn)] 179 | (when (< 1 (count ps)) 180 | (throw (IllegalStateException. 181 | (str "Multiple primaries known to " 182 | conn 183 | ": " 184 | ps)))) 185 | 186 | (first ps))) 187 | 188 | (defn await-join 189 | "Block until all nodes in the test are known to this connection's replset 190 | status" 191 | [test conn] 192 | (while (not= (set (:nodes test)) 193 | (->> (replica-set-status conn) 194 | :members 195 | (map :name) 196 | (map client/addr->node) 197 | set)) 198 | (info :replica-set-status 199 | (with-out-str (->> (replica-set-status conn) 200 | :members 201 | (map :name) 202 | (map client/addr->node) 203 | sort 204 | pprint) 205 | (prn :test (sort (:nodes test))))) 206 | (Thread/sleep 1000))) 207 | 208 | (defn await-primary 209 | "Block until a primary is known to the current node." 210 | [conn] 211 | (while (not (primary conn)) 212 | (Thread/sleep 1000))) 213 | 214 | (defn join! 215 | "Joins nodes into a replica set. Intended for use during setup." 216 | [test node] 217 | (let [port (if (config-server? test) 218 | client/config-port 219 | client/shard-port)] 220 | ; Wait for all nodes to be reachable 221 | (.close (client/await-open node port)) 222 | (jepsen/synchronize test 300) 223 | 224 | ; Start RS 225 | (when (= node (jepsen/primary test)) 226 | (with-open [conn (client/open node port)] 227 | (info "Initiating replica set on" node "\n" 228 | (with-out-str (pprint (target-replica-set-config test)))) 229 | (replica-set-initiate! conn 230 | (target-replica-set-config test)) 231 | 232 | (info "Waiting for cluster join") 233 | (await-join test conn) 234 | 235 | (info "Waiting for primary election") 236 | (await-primary conn) 237 | (info "Primary ready"))) 238 | 239 | ; For reasons I really don't understand, you have to prevent other nodes 240 | ; from checking the replset status until *after* we initiate the replset on 241 | ; the primary--so we insert a barrier here to make sure other nodes don't 242 | ; wait until primary initiation is complete. 243 | (jepsen/synchronize test 300) 244 | 245 | ; For other reasons I don't understand, you *have* to open a new set of 246 | ; connections after replset initation. I have a hunch that this happens 247 | ; because of a deadlock or something in mongodb itself, but it could also 248 | ; be a client connection-closing-detection bug. 249 | 250 | ; Amusingly, we can't just time out these operations; the client appears to 251 | ; swallow thread interrupts and keep on doing, well, something. FML. 252 | (with-open [conn (client/open node port)] 253 | (info "Waiting for cluster join") 254 | (await-join test conn) 255 | 256 | (info "Waiting for primary") 257 | (await-primary conn) 258 | 259 | (info "Primary is" (primary conn)) 260 | (jepsen/synchronize test 300)))) 261 | 262 | (defn replica-set-db 263 | "This database runs a single replica set." 264 | [] 265 | (reify 266 | Conn 267 | (host [_ test node] node) 268 | (port [_ test] (if (config-server? test) 269 | client/config-port 270 | client/shard-port)) 271 | 272 | db/DB 273 | (setup! [db test node] 274 | (install! test) 275 | (configure! test node) 276 | (start! test node) 277 | (join! test node)) 278 | 279 | (teardown! [db test node] 280 | (db/kill! db test node) 281 | (wipe! test node)) 282 | 283 | db/LogFiles 284 | (log-files [db test node] 285 | ; This might fail if the log file doesn't exist 286 | (c/su (meh (c/exec :chmod :a+r log-file))) 287 | {log-file "mongod.log"}) 288 | 289 | db/Process 290 | (start! [_ test node] 291 | (start! test node)) 292 | 293 | (kill! [_ test node] 294 | (c/su (cu/grepkill! :mongod)) 295 | (stop! test node)) 296 | 297 | db/Pause 298 | (pause! [_ test node] 299 | (c/su (cu/grepkill! :stop :mongod))) 300 | 301 | (resume! [_ test node] 302 | (c/su (cu/grepkill! :cont :mongod))) 303 | 304 | db/Primary 305 | (setup-primary! [_ test node]) 306 | 307 | (primaries [this test] 308 | (try (->> (:nodes test) 309 | (real-pmap (fn [node] 310 | (with-open [conn (client/open 311 | node 312 | (port this test))] 313 | ; Huh, sometimes Mongodb DOES return multiple 314 | ; primaries from a single request. Weeeeird. 315 | (primaries conn)))) 316 | (reduce concat) 317 | distinct) 318 | (catch Exception e 319 | (info e "Can't determine current primaries") 320 | nil))))) 321 | 322 | ;; Sharding 323 | 324 | (defn shard-node-plan 325 | "Takes a test, and produces a map of shard names to lists of nodes 326 | which form the replica set for that set. We always generate a config replica 327 | set, and fill remaining nodes with shards. 328 | 329 | {\"config\" [\"n1\" \"n2\" ...] 330 | \"shard1\" [\"n4\" ...] 331 | \"shard2\" [\"n7\" ...]}" 332 | [test] 333 | (let [n (:nodes test) 334 | shard-size 3] 335 | (assert (< (* 2 shard-size) (count n)) 336 | (str "Need at least " (* 2 shard-size) " nodes for 1 shard")) 337 | (zipmap (->> (range) (map inc) (map (partial str "shard")) (cons "config")) 338 | (partition-all shard-size n)))) 339 | 340 | (defn test-for-shard 341 | "Takes a test map and a shard map, and creates a version of the test map with 342 | the replica set name and nodes based on the given shard. 343 | 344 | (test-for-shard test {:nodes [...})" 345 | [test shard] 346 | (assoc test 347 | :nodes (:nodes shard) 348 | :replica-set-name (str "rs_" (:name shard)))) 349 | 350 | (defn shard-for-node 351 | "Takes a sharded DB and a node; returns the shard this node belongs to." 352 | [sharded-db node] 353 | (first (filter (fn [shard] (some #{node} (:nodes shard))) 354 | (:shards sharded-db)))) 355 | 356 | (defn on-shards 357 | "Takes a sharded DB. Calls (f shard) in parallel on each 358 | shard. Returns a map of shard names to the results of f on that shard." 359 | [sharded-db f] 360 | (zipmap (map :name (:shards sharded-db)) 361 | (real-pmap f (:shards sharded-db)))) 362 | 363 | (defn on-shards-nodes 364 | "Takes a sharded DB. Calls (f shard node) in parallel on each shard and node. 365 | Returns a map of shards to nodes to the results of f on that shard and node." 366 | [sharded-db f] 367 | (on-shards (fn [shard] 368 | (zipmap (:nodes shard) 369 | (real-pmap (partial f shard) (:nodes shard)))))) 370 | 371 | (defn configure-mongos! 372 | "Sets up mongos configuration file." 373 | [test node config-db] 374 | (c/su 375 | (c/exec :echo :> "/etc/mongos.conf" 376 | (-> (slurp (io/resource "mongos.conf")) 377 | (str/replace "%IP%" (cn/ip node)) 378 | (str/replace "%CONFIG_DB%" config-db))))) 379 | 380 | (defn start-mongos! 381 | "Starts the mongos daemon on the local node." 382 | [test node] 383 | (c/su 384 | (c/exec :mkdir :-p mongos-dir) 385 | (cu/start-daemon! 386 | {:logfile mongos-log-file 387 | :pidfile mongos-pid-file 388 | :chdir mongos-dir} 389 | (str "/usr/bin/" mongos-bin) 390 | :--config "/etc/mongos.conf"))) 391 | 392 | (defn stop-mongos! 393 | "Stops the mongos daemon on the local node." 394 | [test node] 395 | (c/su (cu/stop-daemon! mongos-bin mongos-pid-file))) 396 | 397 | (defn add-shards! 398 | "Adds the initial set of shards for the DB setup." 399 | [node shard-strs] 400 | (with-open [conn (client/open node client/mongos-port)] 401 | (doseq [shard shard-strs] 402 | (info "Adding shard" shard) 403 | (client/admin-command! conn {:addShard shard})))) 404 | 405 | (defrecord Mongos [config-str shard-strs] 406 | db/DB 407 | (setup! [this test node] 408 | (install! test) 409 | (configure-mongos! test node config-str) 410 | (start-mongos! test node) 411 | (info "Waiting for mongos to start") 412 | (client/await-open node client/mongos-port) 413 | (jepsen/synchronize test) 414 | (when (= (jepsen/primary test) node) 415 | (add-shards! node shard-strs))) 416 | 417 | (teardown! [this test node] 418 | (stop-mongos! test node) 419 | (c/su 420 | (c/exec :rm :-rf mongos-log-file mongos-dir))) 421 | 422 | db/LogFiles 423 | (log-files [this test node] 424 | ; This might fail if the log file doesn't exist 425 | (c/su (meh (c/exec :chmod :a+r mongos-log-file))) 426 | {mongos-log-file "mongos.log"})) 427 | 428 | (defrecord ShardedDB [mongos shards tcpdump] 429 | Conn 430 | (host [this test node] node) 431 | (port [this test] client/mongos-port) 432 | 433 | db/DB 434 | (setup! [this test node] 435 | ;(db/setup! tcpdump test node) 436 | (let [shard (shard-for-node this node)] 437 | (info "Setting up shard" shard) 438 | (db/setup! (:db shard) (test-for-shard test shard) node)) 439 | 440 | (db/setup! mongos test node)) 441 | 442 | (teardown! [this test node] 443 | (db/teardown! mongos test node) 444 | (let [shard (shard-for-node this node)] 445 | (info "Tearing down shard" shard) 446 | (db/teardown! (:db shard) (test-for-shard test shard) node)) 447 | ;(db/teardown! tcpdump test node) 448 | ) 449 | 450 | db/LogFiles 451 | (log-files [this test node] 452 | (merge ;(db/log-files tcpdump test node) 453 | (db/log-files mongos test node) 454 | (let [shard (shard-for-node this node)] 455 | (db/log-files (:db shard) (test-for-shard test shard) node)))) 456 | 457 | db/Primary 458 | (setup-primary! [_ test node] nil) 459 | (primaries [this test] 460 | (->> (on-shards this 461 | (fn [shard] 462 | (db/primaries (:db shard) 463 | (test-for-shard test shard)))) 464 | vals 465 | (reduce concat) 466 | distinct)) 467 | 468 | db/Process 469 | (start! [this test node] 470 | (let [shard (shard-for-node this node)] 471 | (db/start! (:db shard) (test-for-shard test shard) node))) 472 | 473 | (kill! [this test node] 474 | (let [shard (shard-for-node this node)] 475 | (db/kill! (:db shard) (test-for-shard test shard) node))) 476 | 477 | db/Pause 478 | (pause! [this test node] 479 | (let [shard (shard-for-node this node)] 480 | (db/pause! (:db shard) (test-for-shard test shard) node))) 481 | 482 | (resume! [this test node] 483 | (let [shard (shard-for-node this node)] 484 | (db/resume! (:db shard) (test-for-shard test shard) node)))) 485 | 486 | (defn sharded-db 487 | "This database deploys a config server replica set, shard replica sets, and 488 | mongos sharding servers." 489 | [opts] 490 | (let [plan (shard-node-plan opts)] 491 | (ShardedDB. 492 | (Mongos. 493 | ; Config server 494 | (->> (get plan "config") 495 | (map #(str % ":" client/config-port)) 496 | (str/join ",") 497 | (str "rs_config/")) 498 | ; Shards 499 | (->> plan 500 | (keep (fn [[rs nodes]] 501 | (when-not (= "config" rs) 502 | (str "rs_" rs "/" 503 | (first nodes) ":" client/shard-port)))))) 504 | (->> plan 505 | (map (fn [[shard-name nodes]] 506 | {:name shard-name 507 | :nodes nodes 508 | :db (replica-set-db)}))) 509 | 510 | (db/tcpdump {:filter "host 192.168.122.1" 511 | :ports [client/mongos-port]})))) 512 | 513 | (defrecord LazyFSDB [lazyfs mongodb] 514 | Conn 515 | (host [_ test node] (host mongodb test node)) 516 | (port [_ test] 517 | (port mongodb test)) 518 | 519 | db/DB 520 | (setup! [_ test node] 521 | (install! test) ; Gives us the user so we can create the directory structure 522 | (db/setup! lazyfs test node) 523 | (db/setup! mongodb test node)) 524 | 525 | (teardown! [_ test node] 526 | (let [running (try+ (c/exec :pgrep :lazyfs) 527 | (catch [:exit 1] _ "not running"))] 528 | (info "LazyFS is" running)) 529 | (try (db/teardown! mongodb test node) 530 | (finally 531 | (db/teardown! lazyfs test node)))) 532 | 533 | db/Primary 534 | (setup-primary! [_ test node] 535 | (db/setup-primary! mongodb test node)) 536 | 537 | (primaries [this test] 538 | (db/primaries mongodb test)) 539 | 540 | db/LogFiles 541 | (log-files [_ test node] 542 | (merge (db/log-files mongodb test node) 543 | (db/log-files lazyfs test node))) 544 | 545 | db/Process 546 | (start! [_ test node] 547 | (db/start! mongodb test node)) 548 | 549 | (kill! [_ test node] 550 | (db/kill! mongodb test node) 551 | (lazyfs/lose-unfsynced-writes! lazyfs)) 552 | 553 | db/Pause 554 | (pause! [_ test node] 555 | (db/pause! mongodb test node)) 556 | 557 | (resume! [this test node] 558 | (db/resume! mongodb test node))) 559 | 560 | (defn lazyfs-db 561 | "Wraps another Mongo database, making sure that its data directory is a 562 | lazyfs mount." 563 | [mongodb] 564 | (LazyFSDB. (lazyfs/db {:dir data-dir 565 | :user user}) 566 | mongodb)) 567 | 568 | (defn db 569 | "Constructs a MongoDB DB based on CLI options. 570 | 571 | :lazyfs If set, mounts the data directory in a lazyfs, and causes 572 | process kills to wipe the page cache. 573 | :sharded If set, deploys a sharded cluster with a config replica set 574 | and n shards. 575 | :proxy If set, adds a proxy in front of the cluster. 576 | :local-proxy If set, adds a local proxy in front of the cluster." 577 | [opts] 578 | (cond-> (if (:sharded opts) 579 | (sharded-db opts) 580 | (replica-set-db)) 581 | (:lazyfs opts) lazyfs-db 582 | (:proxy opts) proxy/db 583 | (:local-proxy opts) local-proxy/db)) 584 | -------------------------------------------------------------------------------- /src/jepsen/mongodb/client.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mongodb.client 2 | "Wraps the MongoDB Java client." 3 | (:require [clojure.walk :as walk] 4 | [clojure.tools.logging :refer [info warn]] 5 | [dom-top.core :refer [assert+]] 6 | [jepsen [util :as util :refer [timeout]]] 7 | [slingshot.slingshot :refer [try+ throw+]] 8 | [wall.hack :as hack]) 9 | (:import (java.io Closeable) 10 | (java.util ArrayList 11 | List) 12 | (java.util.concurrent TimeUnit) 13 | (com.mongodb Block 14 | ConnectionString 15 | MongoClientException 16 | MongoClientSettings 17 | MongoClientSettings$Builder 18 | MongoConnectionPoolClearedException 19 | MongoQueryException 20 | MongoSocketReadException 21 | MongoSocketReadTimeoutException 22 | MongoTimeoutException 23 | MongoWriteConcernException 24 | ReadConcern 25 | ReadPreference 26 | ServerAddress 27 | TransactionOptions 28 | WriteConcern) 29 | (com.mongodb.client ClientSession 30 | MongoClient 31 | MongoClients 32 | MongoCollection 33 | MongoDatabase 34 | TransactionBody) 35 | (com.mongodb.client.internal ClientSessionImpl 36 | ClientSessionImpl$TransactionState) 37 | (com.mongodb.client.model Filters 38 | FindOneAndUpdateOptions 39 | ReplaceOptions 40 | ReturnDocument 41 | Sorts 42 | Updates 43 | UpdateOptions) 44 | (com.mongodb.client.result UpdateResult) 45 | (com.mongodb.internal.connection 46 | MongoWriteConcernWithResponseException) 47 | (com.mongodb.internal.session BaseClientSessionImpl 48 | ServerSessionPool 49 | ServerSessionPool$ServerSessionImpl) 50 | (com.mongodb.session ServerSession) 51 | (org.bson BsonBinary 52 | Document))) 53 | 54 | (def mongos-port 27017) 55 | (def shard-port 27018) 56 | (def config-port 27019) 57 | (def proxy-port 7901) 58 | 59 | (defprotocol Conn 60 | "We're juggling mongod, mongos, and possibly a proxy server in front of that. 61 | This protocol lets us tell what host and port clients should use to connect 62 | to something. We implement this on each DB." 63 | (host [this test node] "What host should we use to connect to this DB?") 64 | (port [this test] "What port should we use to connect to this DB?")) 65 | 66 | (defn close! 67 | "Closes any Closeable." 68 | [^Closeable c] 69 | (.close c)) 70 | 71 | ;; Basic node manipulation 72 | (defn addr->node 73 | "Takes a node address like n1:27017 and returns just n1" 74 | [addr] 75 | ((re-find #"(.+):\d+" addr) 1)) 76 | 77 | (defmacro with-block 78 | "Wrapper for the functional mongo Block interface" 79 | [x & body] 80 | `(reify Block 81 | (apply [_ ~x] 82 | ~@body))) 83 | 84 | ;; Connection management 85 | (defn config-server? 86 | "Takes a test map, and returns true iff this set of nodes is intended to be a 87 | configsvr--e.g. if it has a :replica-set-name of 'rs_config'." 88 | [test] 89 | (= (:replica-set-name test) "rs_config")) 90 | 91 | (defn ^MongoClient open 92 | "Opens a connection to a node. Second arg can be either a test map (in which 93 | case the port is derived from (port (:db test)) or an integer (in which case 94 | it's used directly." 95 | [node test-or-port] 96 | (let [host (if (integer? test-or-port) 97 | node 98 | (host (:db test-or-port) test-or-port node)) 99 | port (if (integer? test-or-port) 100 | test-or-port 101 | (port (:db test-or-port) test-or-port)) 102 | test (if (integer? test-or-port) {} test-or-port)] 103 | ;(info "Connecting to" (str host ":" port)) 104 | (MongoClients/create 105 | (cond-> (.. (MongoClientSettings/builder) 106 | (applyToClusterSettings 107 | (with-block builder 108 | (.. builder 109 | (hosts [(ServerAddress. host port)]) 110 | (serverSelectionTimeout 1 TimeUnit/SECONDS)))) 111 | (applyToSocketSettings 112 | (with-block builder 113 | (.. builder 114 | (connectTimeout 5 TimeUnit/SECONDS) 115 | (readTimeout 5 TimeUnit/SECONDS)))) 116 | (applyToConnectionPoolSettings 117 | (with-block builder 118 | (.. builder 119 | (minSize 1) 120 | (maxSize 1) 121 | (maxWaitTime 1 TimeUnit/SECONDS))))) 122 | 123 | (boolean? (:retry-writes test)) 124 | (.retryWrites (:retry-writes test)) 125 | 126 | true (.build))))) 127 | 128 | (declare ping) 129 | 130 | (defn ^MongoClient await-open* 131 | "Blocks until (open node) succeeds, and optionally pings." 132 | [node test-or-port ping?] 133 | (let [port (if (integer? test-or-port) 134 | test-or-port 135 | (port (:db test-or-port) test-or-port))] 136 | (assert+ (integer? port) {:type ::not-integer-port 137 | :db (when-not (integer? test-or-port) 138 | (:db test-or-port)) 139 | :port port}) 140 | (util/await-fn 141 | (fn conn [] 142 | (try+ 143 | (let [conn (open node test-or-port)] 144 | (try 145 | (when ping? 146 | ;(.first (.listDatabaseNames conn)) 147 | (ping conn)) 148 | conn 149 | ; Don't leak clients when they fail 150 | (catch Throwable t 151 | (.close conn) 152 | (throw t)))) 153 | (catch com.mongodb.MongoTimeoutException e 154 | (info "Mongo timeout while waiting for conn; retrying." 155 | (.getMessage e)) 156 | (throw+ {:type ::timed-out-awaiting-connection 157 | :node node 158 | :port port})) 159 | (catch com.mongodb.MongoNodeIsRecoveringException e 160 | (info "Node is recovering; retrying." (.getMessage e)) 161 | (throw+ {:type :node-recovering-awaiting-connection 162 | :node node 163 | :port port})) 164 | (catch com.mongodb.MongoSocketReadTimeoutException e 165 | (info "Mongo socket read timeout waiting for conn; retrying") 166 | (throw+ {:type :mongo-read-timeout-awaiting-connection 167 | :node node 168 | :port port})))) 169 | {:retry-interval 1000 170 | :log-interval 10000 171 | :log-message (str "Waiting for " node ":" port " to be available") 172 | :timeout 300000}))) 173 | 174 | (defn ^MongoClient await-open 175 | "Blocks until (open node) succeeds and the server responds to ping. Helpful 176 | for initial cluster setup." 177 | [node port] 178 | (await-open* node port true)) 179 | 180 | ; Basic plumbing 181 | (defprotocol ToDoc 182 | "Supports coercion to MongoDB BSON Documents." 183 | (->doc [x])) 184 | 185 | (extend-protocol ToDoc 186 | nil 187 | (->doc [_] (Document.)) 188 | 189 | clojure.lang.Keyword 190 | (->doc [x] (name x)) 191 | 192 | clojure.lang.IPersistentMap 193 | (->doc [x] 194 | (->> x 195 | (map (fn [[k v]] [(name k) (->doc v)])) 196 | (into {}) 197 | (Document.))) 198 | 199 | clojure.lang.Sequential 200 | (->doc [x] 201 | (ArrayList. (map ->doc x))) 202 | 203 | Object 204 | (->doc [x] x)) 205 | 206 | (defprotocol FromDoc 207 | "Supports coercion from MongoDB BSON Documents" 208 | (parse [x])) 209 | 210 | (extend-protocol FromDoc 211 | nil 212 | (parse [x] nil) 213 | 214 | Document 215 | (parse [x] 216 | (persistent! 217 | (reduce (fn [m [k v]] 218 | (assoc! m (keyword k) (parse v))) 219 | (transient {}) 220 | (.entrySet x)))) 221 | 222 | UpdateResult 223 | (parse [r] 224 | {:matched-count (.getMatchedCount r) 225 | :modified-count (.getModifiedCount r) 226 | :upserted-id (.getUpsertedId r) 227 | :acknowledged? (.wasAcknowledged r)}) 228 | 229 | List 230 | (parse [x] 231 | (map parse x)) 232 | 233 | Object 234 | (parse [x] 235 | x)) 236 | 237 | ;; Write Concerns 238 | (defn write-concern 239 | "Turns a named (e.g. :majority, \"majority\") into a WriteConcern. Integer 240 | strings like \"2\" are converted to a WriteConcern as well. Optionally takes 241 | a journal option, which can be true (forces journaling), false (disables 242 | journaling), 243 | or nil (leaves as default)." 244 | ([wc] 245 | (write-concern wc nil)) 246 | ([wc j] 247 | (let [wc (when wc 248 | (cond-> (case (name wc) 249 | "acknowledged" WriteConcern/ACKNOWLEDGED 250 | "journaled" WriteConcern/JOURNALED 251 | "majority" WriteConcern/MAJORITY 252 | "unacknowledged" WriteConcern/UNACKNOWLEDGED 253 | (WriteConcern. (Integer/parseInt wc))) 254 | (not (nil? j)) (.withJournal j)))] 255 | wc))) 256 | 257 | (defn read-concern 258 | "Turns a named (e.g. :majority, \"majority\" into a ReadConcern." 259 | [rc] 260 | (when rc 261 | (case (name rc) 262 | "available" ReadConcern/AVAILABLE 263 | "default" ReadConcern/DEFAULT 264 | "linearizable" ReadConcern/LINEARIZABLE 265 | "local" ReadConcern/LOCAL 266 | "majority" ReadConcern/MAJORITY 267 | "snapshot" ReadConcern/SNAPSHOT 268 | (ReadConcern. (Integer/parseInt rc))))) 269 | 270 | (defn transactionless-read-concern 271 | "Read concern SNAPSHOT isn't supported outside transactions; we weaken it to 272 | MAJORITY." 273 | [rc] 274 | (case rc 275 | "snapshot" "majority" 276 | rc)) 277 | 278 | (defn ^ReadPreference read-preference 279 | "Turns a string or keyword read preference into a Mongo ReadPreference. nil 280 | is passed through." 281 | [pref] 282 | (when pref 283 | (ReadPreference/valueOf (name pref)))) 284 | 285 | ;; Error handling 286 | (defmacro with-errors 287 | "Remaps common errors; takes an operation and returns a :fail or :info op 288 | when a throw occurs in body." 289 | [op & body] 290 | `(try ~@body 291 | (catch MongoConnectionPoolClearedException e# 292 | (assoc ~op :type :fail, :error [:connection-pool-cleared 293 | (.getMessage e#)])) 294 | 295 | (catch com.mongodb.MongoNotPrimaryException e# 296 | (assoc ~op :type :fail, :error :not-primary)) 297 | 298 | (catch com.mongodb.MongoNodeIsRecoveringException e# 299 | (assoc ~op :type :fail, :error :node-recovering)) 300 | 301 | (catch MongoSocketReadException e# 302 | (assoc ~op :type :info, :error [:socket-read-exception 303 | (.getMessage e#)])) 304 | 305 | (catch MongoSocketReadTimeoutException e# 306 | (assoc ~op :type :info, :error :socket-read-timeout)) 307 | 308 | (catch com.mongodb.MongoTimeoutException e# 309 | (condp re-find (.getMessage e#) 310 | #"Timed out after \d+ ms while waiting to connect" 311 | (assoc ~op :type :fail, :error :connect-timeout) 312 | 313 | ; What was this message? 314 | ;(assoc ~op :type :info, :error :mongo-timeout) 315 | 316 | (throw e#))) 317 | 318 | (catch com.mongodb.MongoExecutionTimeoutException e# 319 | (assoc ~op :type :info, :error :mongo-execution-timeout)) 320 | 321 | (catch com.mongodb.MongoWriteException e# 322 | (condp re-find (.getMessage e#) 323 | #"Not primary so we cannot begin or continue a transaction" 324 | (assoc ~op :type :fail, :error :not-primary-cannot-txn) 325 | 326 | ; This LOOKS like it ought to be a definite failure, but it's not! 327 | ; Write transactions can throw this but actually succeed. I'm calling 328 | ; it info for now. 329 | #"Could not find host matching read preference" 330 | (assoc ~op :type :info, :error :no-host-matching-read-preference) 331 | 332 | (throw e#))) 333 | 334 | (catch MongoWriteConcernException e# 335 | (condp re-find (.getMessage e#) 336 | #"operation was interrupted" 337 | (assoc ~op :type :fail, :error :write-concern-interrupted) 338 | 339 | #"Primary stepped down while waiting for replication" 340 | (assoc ~op :type :fail, :error :primary-stepped-down-waiting-for-replication) 341 | 342 | (throw e#))) 343 | 344 | (catch com.mongodb.MongoCommandException e# 345 | (condp re-find (.getMessage e#) 346 | #"error 133 " 347 | (assoc ~op :type :fail, :error [:failed-to-satisfy-read-preference 348 | (.getMessage e#)]) 349 | 350 | #"WriteConflict" 351 | (assoc ~op :type :fail, :error :write-conflict) 352 | 353 | ; Huh, this is NOT, as it turns out, a determinate failure. 354 | #"TransactionCoordinatorSteppingDown" 355 | (assoc ~op :type :info, :error :transaction-coordinator-stepping-down) 356 | 357 | ; This can be the underlying cause of issues like "unable to 358 | ; initialize targeter for write op for collection..." 359 | ; These are ALSO apparently not... determinate failures? 360 | #"Connection refused" 361 | (assoc ~op :type :info, :error :connection-refused) 362 | 363 | ; Likewise 364 | #"Connection reset by peer" 365 | (assoc ~op :type :info, :error :connection-reset-by-peer) 366 | 367 | (throw e#))) 368 | 369 | (catch MongoClientException e# 370 | (condp re-find (.getMessage e#) 371 | ; This... seems like a bug too 372 | ; Can also happen when connecting to a hidden replica 373 | #"Sessions are not supported by the MongoDB cluster to which this client is connected" 374 | (do (Thread/sleep 5000) 375 | (assoc ~op :type :fail, :error :sessions-not-supported-by-cluster)) 376 | 377 | (throw e#))) 378 | 379 | (catch MongoQueryException e# 380 | (condp re-find (.getMessage e#) 381 | #"Could not find host matching read preference" 382 | (assoc ~op :type :fail, :error :no-host-matching-read-preference) 383 | 384 | #"code 251 " (assoc ~op :type :fail, :error :transaction-aborted) 385 | 386 | #"code 133 " (assoc ~op :type :fail, 387 | :error [:failed-to-satisfy-read-preference 388 | (.getMessage e#)]) 389 | 390 | ; Why are there two ways to report this? 391 | #"code 10107 " (assoc ~op :type :fail, :error :not-primary-2) 392 | 393 | #"code 11602 " (assoc ~op :type :info, :error :interrupted-due-to-repl-state-change) 394 | 395 | #"code 13436 " (assoc ~op :type :fail, :error :not-primary-or-recovering) 396 | (throw e#))) 397 | 398 | (catch MongoTimeoutException e# 399 | (condp re-find (.getMessage e#) 400 | ; If we timed out before getting a connection, we clearly can't have 401 | ; done anything. 402 | #"while waiting for a connection to server" 403 | (assoc ~op :type :fail, :error :timeout-waiting-for-connection) 404 | 405 | (throw e#))) 406 | 407 | (catch MongoWriteConcernWithResponseException e# 408 | (condp re-find (.getMessage e#) 409 | ; Not really clear whether this should be a definite failure or not, 410 | ; but writes that fail with this kind of error DO succeed on occasion, 411 | ; so let's call it info. 412 | #"InterruptedDueToReplStateChange" 413 | (assoc ~op :type :info, :error :interrupted-due-to-repl-state-change) 414 | (throw e#))) 415 | )) 416 | 417 | (defn ^MongoDatabase db 418 | "Get a DB from a connection. Options may include 419 | 420 | :write-concern e.g. :majority 421 | :read-concern e.g. :local 422 | :read-preference e.g. :secondary 423 | :journal If present, forces journaling to be enabled or disabled 424 | for write concerns." 425 | ([conn db-name] 426 | (.getDatabase conn db-name)) 427 | ([conn db-name opts] 428 | (let [rc (read-concern (:read-concern opts)) 429 | rp (read-preference (:read-preference opts)) 430 | wc (write-concern (:write-concern opts) (:journal opts))] 431 | (cond-> (db conn db-name) 432 | rc (.withReadConcern rc) 433 | rp (.withReadPreference rp) 434 | wc (.withWriteConcern wc))))) 435 | 436 | (defn ^MongoCollection collection 437 | "Gets a Mongo collection from a DB." 438 | [^MongoDatabase db collection-name] 439 | (.getCollection db collection-name)) 440 | 441 | (defn create-collection! 442 | [^MongoDatabase db collection-name] 443 | (.createCollection db collection-name)) 444 | 445 | ;; Sessions 446 | 447 | (defn ^ClientSession start-session 448 | "Starts a new session" 449 | [conn] 450 | (.startSession conn)) 451 | 452 | ; The astute reader may ask: just *why* are we so pre-occupied with hacking our 453 | ; way into the guts of client & server session state and making one session 454 | ; look like another? Because, dearest reader, we intend to do something 455 | ; terrible but apparently not forbidden by the spec, and split a single 456 | ; transaction across *multiple* nodes with independent clients. 457 | 458 | (defn ^ServerSession get-server-session 459 | "Takes a client session and extracts its corresponding server session." 460 | [^ClientSession client-session] 461 | (.getServerSession client-session)) 462 | 463 | (defn ^BaseClientSessionImpl set-server-session! 464 | "Takes a client session and sets its server session. Returns client session." 465 | [^BaseClientSessionImpl client-session ^ServerSession server-session] 466 | (let [field (.getDeclaredField BaseClientSessionImpl "serverSession")] 467 | (.setAccessible field true) 468 | (.set field client-session server-session)) 469 | client-session) 470 | 471 | (defn ^ClientSessionImpl$TransactionState txn-state 472 | "Retrieves the txn state from a client session." 473 | [^ClientSessionImpl session] 474 | (let [field (.getDeclaredField ClientSessionImpl "transactionState")] 475 | (.setAccessible field true) 476 | (.get field session))) 477 | 478 | (defn ^ClientSessionImpl set-txn-state! 479 | "Sets the transactionState field of a client session, and returns it." 480 | [^ClientSessionImpl session ^ClientSessionImpl$TransactionState state] 481 | (let [field (.getDeclaredField ClientSessionImpl "transactionState")] 482 | (.setAccessible field true) 483 | (.set field session state)) 484 | session) 485 | 486 | (defn ^TransactionOptions txn-opts 487 | "Retrieves the transaction options from a client session." 488 | [^ClientSessionImpl session] 489 | (let [field (.getDeclaredField ClientSessionImpl "transactionOptions")] 490 | (.setAccessible field true) 491 | (.get field session))) 492 | 493 | (defn ^ClientSessionImpl set-txn-opts! 494 | "Sets the transaction options field of a client session, and returns it." 495 | [^ClientSessionImpl session ^TransactionOptions opts] 496 | (let [field (.getDeclaredField ClientSessionImpl "transactionOptions")] 497 | (.setAccessible field true) 498 | (.set field session opts)) 499 | session) 500 | 501 | (defn ^ServerSessionPool$ServerSessionImpl 502 | set-server-session-impl-transaction-number! 503 | "Override a server session's transaction number. Returns the session." 504 | [^ServerSessionPool$ServerSessionImpl session ^long txn-no] 505 | (let [field (.getDeclaredField ServerSessionPool$ServerSessionImpl 506 | "transactionNumber")] 507 | (.setAccessible field true) 508 | (.setLong field session txn-no)) 509 | session) 510 | 511 | (defn ^ServerSessionPool server-session->server-session-pool 512 | "Extracts the server session pool from a ServerSessionImpl. This is stored in 513 | the implicit instance variable this$0." 514 | [^ServerSessionPool$ServerSessionImpl session] 515 | (let [field (.getDeclaredField ServerSessionPool$ServerSessionImpl "this$0")] 516 | (.setAccessible field true) 517 | (.get field session))) 518 | 519 | (defn clone-server-session 520 | "Takes a server session and returns a copy of it with the same session 521 | identifier and transaction number." 522 | [^ServerSessionPool$ServerSessionImpl session] 523 | (let [; First, extract the session identifier from the original session 524 | session-id (.getIdentifier session) 525 | identifier (.get session-id "id") 526 | ; And the transaction number. 527 | txn-no (.getTransactionNumber session) 528 | ; We also need the server session pool so we can make a new session 529 | pool (server-session->server-session-pool session) 530 | ; Now construct a fresh session 531 | constructor (first (.getDeclaredConstructors 532 | ServerSessionPool$ServerSessionImpl)) 533 | _ (.setAccessible constructor true) 534 | session' (-> constructor 535 | (.newInstance (into-array Object [pool identifier])) 536 | (set-server-session-impl-transaction-number! txn-no))] 537 | session')) 538 | 539 | (defmacro with-session-like 540 | "Takes a vector of a target client session and a source client session. 541 | Within body, client session will have its session ID, txn number, transaction 542 | state, and transaction options, overridden to look like the source session, 543 | then reset at the end of the body." 544 | [[target source] & body] 545 | `(let [server-session# (get-server-session ~target) 546 | server-session'# (clone-server-session (get-server-session ~source)) 547 | txn-state# (txn-state ~target) 548 | txn-state'# (txn-state ~source) 549 | txn-opts# (txn-opts ~target) 550 | txn-opts'# (txn-opts ~source)] 551 | (set-server-session! ~target server-session'#) 552 | (set-txn-state! ~target txn-state'#) 553 | (set-txn-opts! ~target txn-opts'#) 554 | (try ~@body 555 | (finally 556 | (set-server-session! ~target server-session#) 557 | (set-txn-state! ~target txn-state#) 558 | (set-txn-opts! ~target txn-opts#))))) 559 | 560 | (defn notify-message-sent! 561 | "Notifies a session that a message was sent in this transaction. We have to 562 | force this when we're threading transactions across multiple sessions." 563 | [^ClientSessionImpl session] 564 | (.notifyMessageSent session)) 565 | 566 | ;; Transactions 567 | 568 | (defmacro txn 569 | "Converts body to a TransactionBody function." 570 | [& body] 571 | `(reify TransactionBody 572 | (execute [this] 573 | ~@body))) 574 | 575 | (defn start-txn! 576 | "Starts a txn on a session with the given transaction options." 577 | [^ClientSession session ^TransactionOptions opts] 578 | (.startTransaction session opts)) 579 | 580 | (defn commit-txn! 581 | [^ClientSession session] 582 | (.commitTransaction session)) 583 | 584 | ;; Actual commands 585 | 586 | (defn command! 587 | "Runs a command on the given db." 588 | [^MongoDatabase db cmd] 589 | (parse (.runCommand db (->doc cmd)))) 590 | 591 | (defn admin-command! 592 | "Runs a command on the admin database." 593 | [conn cmd] 594 | (command! (db conn "admin") cmd)) 595 | 596 | (defn ping 597 | "Pings the server with a default database." 598 | [conn] 599 | (admin-command! conn {:ping 1})) 600 | 601 | (defn find-one 602 | "Find a document by ID. If a session is provided, will use that session 603 | for a causally consistent read" 604 | ([coll id] 605 | (find-one coll nil id)) 606 | ([^MongoCollection coll ^ClientSession session id] 607 | (let [filt (Filters/eq "_id" id)] 608 | (-> (if session 609 | (.find coll session filt) 610 | (.find coll filt)) 611 | .first 612 | parse)))) 613 | 614 | (defn upsert! 615 | "Ensures the existence of the given document, a map with at minimum an :_id 616 | key." 617 | ([^MongoCollection coll doc] 618 | (upsert! nil coll doc)) 619 | ([^ClientSession session ^MongoCollection coll doc] 620 | (assert (:_id doc)) 621 | (parse 622 | (if session 623 | (.replaceOne coll 624 | session 625 | (Filters/eq "_id" (:_id doc)) 626 | (->doc doc) 627 | (.upsert (ReplaceOptions.) true)) 628 | (.replaceOne coll 629 | (Filters/eq "_id" (:_id doc)) 630 | (->doc doc) 631 | (.upsert (ReplaceOptions.) true)))))) 632 | --------------------------------------------------------------------------------