├── vagrant ├── m202-ubuntu1404 │ ├── .gitignore │ ├── README.md │ ├── provisioners │ │ └── setup.sh │ └── Vagrantfile └── MongoDBU │ ├── README.md │ ├── .vagrant │ └── machines │ │ └── default │ │ └── virtualbox │ │ ├── action_set_name │ │ ├── id │ │ ├── index_uuid │ │ ├── action_provision │ │ └── synced_folders │ ├── provisioners │ └── setup.sh │ └── Vagrantfile ├── README.md ├── ChunkInfo.js ├── LICENSE.md ├── compactness.js ├── mongostat-demangler.sh ├── AllChunkInfo.js ├── two_shards_1m_docs.js ├── pre_alloc.bash └── crud.js /vagrant/m202-ubuntu1404/.gitignore: -------------------------------------------------------------------------------- 1 | /.vagrant 2 | -------------------------------------------------------------------------------- /vagrant/MongoDBU/README.md: -------------------------------------------------------------------------------- 1 | MongoDBU_vm 2 | =========== 3 | -------------------------------------------------------------------------------- /vagrant/MongoDBU/.vagrant/machines/default/virtualbox/action_set_name: -------------------------------------------------------------------------------- 1 | 1405100085 -------------------------------------------------------------------------------- /vagrant/MongoDBU/.vagrant/machines/default/virtualbox/id: -------------------------------------------------------------------------------- 1 | 336fe158-7832-4574-bb55-fd1ba31860f4 -------------------------------------------------------------------------------- /vagrant/MongoDBU/.vagrant/machines/default/virtualbox/index_uuid: -------------------------------------------------------------------------------- 1 | beba46c3c19740a8aca03c5f1244fd9d -------------------------------------------------------------------------------- /vagrant/MongoDBU/.vagrant/machines/default/virtualbox/action_provision: -------------------------------------------------------------------------------- 1 | 1.5:336fe158-7832-4574-bb55-fd1ba31860f4 -------------------------------------------------------------------------------- /vagrant/MongoDBU/.vagrant/machines/default/virtualbox/synced_folders: -------------------------------------------------------------------------------- 1 | {"virtualbox":{"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/adam/git/mongodb-scripts/vagrant/MongoDBU"}}} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mongodb-scripts 2 | =============== 3 | 4 | These are just some scripts and recipes I find useful when administering MongoDB 5 | 6 | They are not intended to be fully fledged tools, and will occasionally need some tweaking to run in particular environments, but I have found them quite useful in the past. 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /vagrant/m202-ubuntu1404/README.md: -------------------------------------------------------------------------------- 1 | M202 Virtual Machine (Ubuntu 14.04.1 LTS) 2 | ========================================= 3 | 4 | This is the vagrant folder for the M202 Ubuntu 14.04 virtal machine. It will provision a machine 5 | with the following characteristics: 6 | 7 | * 2GB RAM 8 | * 8GB Disk (dynamically allocated) 9 | * 1 VCPU 10 | * NAT Networking 11 | * Standard Vagrant port forwarding 12 | * Default login/pass of m202/m202 (sudo is available without a password) 13 | 14 | -------------------------------------------------------------------------------- /vagrant/MongoDBU/provisioners/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | HOME="/home/vagrant" 6 | COURSE="m202" 7 | MONGOPROC="$HOME/$COURSE/mongoProc" 8 | 9 | echo 'Setting up VM...' 10 | 11 | echo 'Updating system... this may take a while' 12 | apt-get -y update > /dev/null 2>&1 13 | DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" upgrade > /dev/null 2>&1 14 | 15 | echo 'Done! Rebooting...' 16 | 17 | reboot 18 | -------------------------------------------------------------------------------- /vagrant/m202-ubuntu1404/provisioners/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #set -e 4 | 5 | #HOME="/home/m202" 6 | #COURSE="m202" 7 | #MONGOPROC="$HOME/mongoProc" 8 | 9 | #echo 'Setting up VM...' 10 | 11 | #echo 'Updating system... this may take a while' 12 | #apt-get -y update > /dev/null 2>&1 13 | #DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" upgrade > /dev/null 2>&1 14 | 15 | #echo 'Done! Rebooting...' 16 | 17 | #reboot 18 | -------------------------------------------------------------------------------- /ChunkInfo.js: -------------------------------------------------------------------------------- 1 | // This is a very simple function, which takes three arguments: 2 | // ns: a string representing the sharded namespace to be examined 3 | // id: the chunk ID for the chunk you want the information on 4 | // est: a boolean to determine whether or not to use the estimate option (recommended generally) 5 | 6 | // It is called from the mongos like so: 7 | 8 | // ChunkInfo("database.collection", database.collection-_id_"value", true); 9 | // Currently the output is CSV, will add options for other output later 10 | 11 | ChunkInfo = function(ns, id, est){ 12 | var configDB = db.getSiblingDB("config"); 13 | var db1 = db.getSiblingDB(ns.split(".")[0]); 14 | var key = configDB.collections.findOne({_id:ns}).key; 15 | var chunk = configDB.chunks.find({"_id" : id }).limit(1).next(); 16 | var dataSizeResult = db1.runCommand({datasize:chunk.ns, keyPattern:key, min:chunk.min, max:chunk.max, estimate:est}); 17 | print("***********Chunk Information***********"); 18 | printjson(chunk); 19 | print("Chunk Size: "+dataSizeResult.size) 20 | print("Objects in chunk: "+dataSizeResult.numObjects) 21 | } -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Adam Comerford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /vagrant/MongoDBU/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 8 | 9 | config.vm.box = "MongoDBU_vm" 10 | config.vm.box_url = "http://mekhar/mongodbu.box" 11 | 12 | config.vm.provision "shell", path: "provisioners/setup.sh" 13 | 14 | config.vm.provider "virtualbox" do |vb| 15 | # Uncomment the next line to show VM window. User/pass = vagrant/vagrant 16 | vb.gui = true 17 | 18 | vb.name = "MongoDBU_vm" 19 | vb.customize ["modifyvm", :id, "--memory", "2048"] 20 | vb.customize ["modifyvm", :id, "--cpus", "1"] 21 | end 22 | 23 | config.vm.provider "vmware_fusion" do |v| 24 | # Uncomment the next line to show VM window. User/pass = vagrant/vagrant 25 | v.gui = true 26 | 27 | v.vmx["displayname"] = "MongoDBU" 28 | v.vmx["memsize"] = "2048" 29 | v.vmx["numvcpus"] = "1" 30 | end 31 | 32 | config.vm.provider "vmware_workstation" do |v| 33 | # Uncomment the next line to show VM window. User/pass = vagrant/vagrant 34 | v.gui = true 35 | 36 | v.vmx["displayname"] = "MongoDBU" 37 | v.vmx["memsize"] = "2048" 38 | v.vmx["numvcpus"] = "1" 39 | end 40 | 41 | end 42 | -------------------------------------------------------------------------------- /vagrant/m202-ubuntu1404/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 8 | 9 | config.vm.box = "m202-ubuntu1404" 10 | config.vm.box_url = "http://127.0.0.1/~adam/m202-ubuntu1404.box" 11 | 12 | config.vm.provision "shell", path: "provisioners/setup.sh" 13 | 14 | config.vm.provider "virtualbox" do |vb| 15 | # Uncomment the next line to show VM window. User/pass = vagrant/vagrant 16 | vb.gui = true 17 | 18 | vb.name = "m202-ubuntu1404" 19 | vb.customize ["modifyvm", :id, "--memory", "2048"] 20 | vb.customize ["modifyvm", :id, "--cpus", "1"] 21 | end 22 | 23 | config.vm.provider "vmware_fusion" do |v| 24 | # Uncomment the next line to show VM window. User/pass = vagrant/vagrant 25 | v.gui = true 26 | 27 | v.vmx["displayname"] = "m202-ubuntu1404" 28 | v.vmx["memsize"] = "2048" 29 | v.vmx["numvcpus"] = "1" 30 | end 31 | 32 | config.vm.provider "vmware_workstation" do |v| 33 | # Uncomment the next line to show VM window. User/pass = vagrant/vagrant 34 | v.gui = true 35 | 36 | v.vmx["displayname"] = "m202-ubuntu1404" 37 | v.vmx["memsize"] = "2048" 38 | v.vmx["numvcpus"] = "1" 39 | end 40 | config.ssh.username = "m202" 41 | end 42 | -------------------------------------------------------------------------------- /compactness.js: -------------------------------------------------------------------------------- 1 | // original credit for this goes to https://github.com/achille 2 | // compactness() calculates how closely the resulting documents are located together 3 | // It counts the size of the d vs size of the unique pages they reside on 4 | 5 | function compactness(collection, query, limit) { 6 | "use strict"; 7 | Object.size = function(o) { 8 | var size = 0, key; 9 | for (key in o) { if (o.hasOwnProperty(key)) size++; } 10 | return size; 11 | }; 12 | 13 | var count = 0, 14 | size=0; 15 | var disklocs = {}; //will store each disk loc, format: file-loc%4kb, ie file-0, file-4096, etc 16 | 17 | db.getCollection(collection).find(query).limit(limit).showDiskLoc().forEach( 18 | function(doc) { 19 | var file = doc.$diskLoc.file, 20 | offset = (doc.$diskLoc.offset), 21 | offsetPage = offset - offset % 4096; 22 | count++; 23 | size += Object.bsonsize(doc) - 45; //$diskloc info adds 45 bytes 24 | disklocs[file + "-" + offsetPage] = 1; 25 | } 26 | ); 27 | var numpages = Object.size(disklocs); 28 | var numbytespages = 1024 * 4 * numpages; 29 | print("Size of returned data in bytes: " + size); 30 | print("Size of pages touched by data : " + numbytespages); 31 | print("Compactness: " + Math.floor(100*size/numbytespages) + "%"); 32 | } 33 | -------------------------------------------------------------------------------- /mongostat-demangler.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat "$@" \ 4 | | awk ' 5 | { 6 | if (! /^insert/ && $3 !~ /\|/) { 7 | $3 = $3 "|0"; 8 | } else { 9 | $3 = $3; 10 | } 11 | print; 12 | } 13 | ' \ 14 | | sed \ 15 | -e '/^connected to/d' \ 16 | -e '/can.t get data/d' \ 17 | -e '/reconnect/d' \ 18 | -e '/DBClient/d' \ 19 | -e 's/\*//g' \ 20 | -e 's/|/ /g' \ 21 | -e 's/:\([0-9.]\+%\)/ \1/' \ 22 | | awk ' 23 | BEGIN { 24 | print "#time insert query update update_r delete getmore command command_r flushes mapped vsize res non-mapped faults locked_db lock% idx_miss_% qr qw ar aw netIn netOut conn set repl"; 25 | header = 0; 26 | } 27 | 28 | { 29 | if (/^insert/) { 30 | if (!header) { 31 | $0 = gensub("^", "#time ", "", $0); 32 | $0 = gensub("time *$", "", "", $0); 33 | $0 = gensub("update", "update update_r", "", $0); 34 | $0 = gensub("command", "command command_r", "", $0); 35 | $0 = gensub("locked db", "locked_db lock%", "", $0); 36 | $0 = gensub("idx miss %", "idx_miss_%", "", $0); 37 | for (i = 1; i <= NF; i++) { 38 | printf("%s(%d)%s", (i==1)?"#":"", i, (i==NF)?"\n":" "); 39 | } 40 | print; 41 | #header = 1; 42 | } 43 | 44 | } else { 45 | 46 | for (i = 1; i <= NF; i++) { 47 | if ($i ~ /k$/) { 48 | $i = 1000 * gensub("k$", "", "", $i); 49 | } 50 | if ($i ~ /m$/) { 51 | $i = 1000000 * gensub("m$", "", "", $i); 52 | } 53 | if ($i ~ /g$/) { 54 | $i = 1000000000 * gensub("g$", "", "", $i); 55 | } 56 | } 57 | if ($3 >= 0) { 58 | } 59 | if ($1 >= 0) { 60 | time = $NF; 61 | for (i = NF; i > 1; i--) { 62 | $i = $(i-1); 63 | } 64 | $1 = time; 65 | print; 66 | } 67 | } 68 | } 69 | ' \ 70 | | column -tn 71 | 72 | -------------------------------------------------------------------------------- /AllChunkInfo.js: -------------------------------------------------------------------------------- 1 | // This is a simple function, which takes just two arguments: 2 | // ns: a string representing the sharded namespace to be examined 3 | // est: a boolean to determine whether or not to use the estimate option (recommended generally) 4 | 5 | // It is called from the mongos like so: 6 | 7 | // AllChunkInfo("database.collection", true); 8 | // Currently the output is CSV, will add options for other output later 9 | 10 | sh.printAllChunkInfo = function(ns, est) { 11 | var configDB = db.getSiblingDB("config"); 12 | var chunks = configDB.chunks.find({ns: ns}).sort({min: 1}); 13 | var key = configDB.collections.findOne({_id: ns}).key; 14 | var total = { chunks: 0, objs: 0, size: 0, empty: 0 }; 15 | var shards = {}; 16 | configDB.shards.find().toArray().forEach( function (shard) { 17 | shards[shard._id] = { chunks: 0, objs: 0, size: 0, empty: 0 }; 18 | } ); 19 | print("ChunkID,Shard,ChunkSize,ObjectsInChunk"); 20 | chunks.forEach( function printChunkInfo(chunk) { 21 | var res = db.getSiblingDB(chunk.ns.split(".")[0]).runCommand({ datasize: chunk.ns, keyPattern: key, min: chunk.min, max: chunk.max, estimate: est }); 22 | print(chunk._id + "," + chunk.shard + "," + res.size + "," + res.numObjects); 23 | (function(stats) { 24 | for (stat in stats) { 25 | stats[stat].chunks++; 26 | stats[stat].objs += res.numObjects; 27 | stats[stat].size += res.size; 28 | if (res.size == 0) stats[stat].empty++; 29 | } 30 | })( [ total, shards[chunk.shard] ] ); 31 | } ); 32 | 33 | function printStats(s, indent) { 34 | print(indent + "Total Chunks: " + s.chunks + " (" + s.empty + " empty)"); 35 | print(indent + "Total Size: " + s.size + " bytes"); 36 | print(indent + "Average Chunk Size: " + (s.size/s.chunks) + " bytes"); 37 | print(indent + "Average Non-empty Chunk Size: " + (s.size/(s.chunks-s.empty)) + " bytes"); 38 | } 39 | 40 | print(""); 41 | print("*********** Summary Information ***********"); 42 | printStats(total, ""); 43 | 44 | print(""); 45 | print("*********** Per-Shard Information ***********"); 46 | for (shard in shards) { 47 | print("Shard " + shard + ":"); 48 | printStats(shards[shard], " "); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /two_shards_1m_docs.js: -------------------------------------------------------------------------------- 1 | // NOTE: This is not pure Javascript and cannot be run as such 2 | // It is a series of instructions, some of which need to be run from the command line, others must be run from a MongoDB shell 3 | // The comments before the instructions will describe where to run each piece 4 | // The command line instructions assume that the MongoDB binaries are in your current working directory on a Unix-like system 5 | 6 | // Start a mongodb shell from the command line, this will be used to create your test cluster 7 | 8 | ./mongo --nodb 9 | 10 | // Once you have a mongo shell, create a test cluster 11 | // We'll just start with 2 shards, and a small chunk size (handy for getting lots of chunks and testing the balancer) 12 | cluster = new ShardingTest({shards: 2, chunksize: 1}); 13 | // You will now see a lot of logging on this terminal, and I find it useful to keep it this way when testing 14 | 15 | // So, start a new mongo shell -- this time connecting to the mongos you just created 16 | ./mongo --port 30999 17 | 18 | // Now that we again have a mongo shell, this time connected to the mongos, let's call the DB chunkTest 19 | use chunkTest; 20 | // enable sharding on the DB 21 | sh.enableSharding("chunkTest"); 22 | // Then create a sharded collection, we'll just call the collection "foo" 23 | // Since this is just a test, we will shard based on _id - this would generally be a bad idea for production 24 | sh.shardCollection("chunkTest.foo", {"_id" : 1}); 25 | // Optional - uncomment the following to disable the balancer before inserting - this will put all data on one shard initially 26 | // sh.stopBalancer(); 27 | // Check the balancer state 28 | sh.getBalancerState(); 29 | // Now insert 1,000,000 docs (increase as necessary) - this may take a while 30 | // In this case, I've put in a couple of random fields, and overridden the _id with the integer counter in the for loop 31 | for(var i = 0; i <= 1000000; i++){db.foo.insert({"_id" : i, "date" : new Date(), "otherID" : new ObjectId()})}; 32 | // and that's it - you now have a 2 shard cluster with 1,000,000 docs to play with -------------------------------------------------------------------------------- /pre_alloc.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Tool to pre-allocate MongoDB data files for MMAPv0/1 storage engine 4 | # 5 | # Requirements: fallocate command from Google ftools - https://code.google.com/p/linux-ftools/#fallocate 6 | # 7 | # Author: Adam Comerford (adam@comerford.cc) 8 | # 9 | # Options: 10 | # 11 | # -s - size of data files to allocate in MB, not including namespace file (default: 192) 12 | # -f - path to fallocate binary (default is to just call the fallocate command, look in PATH) 13 | # -n - name of database (default: data) 14 | # -d - where to place the files (default: /data/db) 15 | 16 | # Error/safety checks 17 | # 18 | # TODO - check for existing files first, if they exist, bail out and carp 19 | # TODO - check for sufficient free space on target device 20 | # TODO - add smallfiles option (divide by 4) 21 | 22 | # set the defaults 23 | SIZE=192 24 | NAME="data" 25 | FBINARY="fallocate" 26 | DBPATH="/data/db" 27 | 28 | 29 | # Parse arguments, overwrite defaults when necessary, error if invalid arg passed 30 | while getopts ":s:n:p:d:" opt; do 31 | case $opt in 32 | s) SIZE="$OPTARG" 33 | ;; 34 | n) NAME="$OPTARG" 35 | ;; 36 | f) FBINARY="$OPTARG" 37 | ;; 38 | d) DBPATH="$OPTARG" 39 | ;; 40 | \?) echo "Invalid option -$OPTARG" >&2 41 | ;; 42 | esac 43 | done 44 | 45 | command -v $FBINARY >/dev/null || { echo "fallocate command not found in PATH, cannot continue, please install util-linux package or similar, or provide the full path to the command."; exit 1; } 46 | 47 | # Create namespace first - always needed 48 | 49 | $FBINARY -l $((1024 * 1024 * 16)) $DBPATH/$NAME.ns 50 | 51 | # calculate the number of files that will be required 52 | # 4032 is the magic number, anything beyond that will have multiples of 2048 53 | NUMFILES=0 54 | 55 | if [ $SIZE -le 4032 ] ; 56 | then 57 | # only a few cases to deal with here 58 | if [ $SIZE -le 192 ] ; 59 | then 60 | NUMFILES=2 61 | elif [ $SIZE -le 448 ] ; 62 | then 63 | NUMFILES=3 64 | elif [ $SIZE -le 960 ] ; 65 | then 66 | NUMFILES=4 67 | elif [ $SIZE -le 1984 ] ; 68 | then 69 | NUMFILES=5 70 | else 71 | NUMFILES=6 72 | fi 73 | else 74 | # for larger than 4032, will always be 7 plus however many 2048 files are needed additionally 75 | NUMFILES=$(( (($SIZE - 4032)/2048) + 7 )) 76 | fi 77 | 78 | ALLOCATED=0 79 | while [ $ALLOCATED -lt $NUMFILES ]; do 80 | case $ALLOCATED in 81 | 0) 82 | $FBINARY -l $((1024 * 1024 * 64)) $DBPATH/$NAME.$ALLOCATED 83 | ((ALLOCATED++)) 84 | ;; 85 | 1) 86 | $FBINARY -l $((1024 * 1024 * 128)) $DBPATH/$NAME.$ALLOCATED 87 | ((ALLOCATED++)) 88 | ;; 89 | 2) 90 | $FBINARY -l $((1024 * 1024 * 256)) $DBPATH/$NAME.$ALLOCATED 91 | ((ALLOCATED++)) 92 | ;; 93 | 3) 94 | $FBINARY -l $((1024 * 1024 * 512)) $DBPATH/$NAME.$ALLOCATED 95 | ((ALLOCATED++)) 96 | ;; 97 | 4) 98 | $FBINARY -l $((1024 * 1024 * 1024)) $DBPATH/$NAME.$ALLOCATED 99 | ((ALLOCATED++)) 100 | ;; 101 | *) 102 | $FBINARY -l $((1024 * 1024 * 2048)) $DBPATH/$NAME.$ALLOCATED 103 | ((ALLOCATED++)) 104 | ;; 105 | esac 106 | done 107 | -------------------------------------------------------------------------------- /crud.js: -------------------------------------------------------------------------------- 1 | // Some functions for inserting quickly (unack'ed writes) (Create) 2 | // Preheating data to get it into memory (Read) 3 | // Changing the data, with/without growth (Update) 4 | // Removing data (Delete) 5 | 6 | // First function is used to create a random set of data, the C in CRUD 7 | // Defaults/Assumptions: _id index only, collection always called "data" 8 | // Takes 3 arguments: 9 | // numGB is the approximate data size to create in GiB (integer usually, but any number should work) 10 | // dbName is the database to use (defaults to a collection called data) 11 | // usePowerOf2 is a boolean to allow you to select the storage strategy 12 | // delay is optional - it will introduce a sleep into the loop to slow down the operations, defaults to 0 13 | 14 | createData = function(numGB, dbName, usePowerOf2, delay) { 15 | var db1 = db.getSiblingDB(dbName); 16 | // set powerOf2 per the boolean, but need to handle it differently if it currently exists or not 17 | // NOTE that second option will turn it off for all new collections 18 | 19 | if(db1.data.findOne()){ 20 | db1.runCommand({ collMod : "data", usePowerOf2Sizes : usePowerOf2 }); 21 | } else { 22 | db1.adminCommand({ setParameter: 1, newCollectionsUsePowerOf2Sizes: usePowerOf2 }); 23 | }; 24 | // set the delay as passed in, with the default of 0 as a fallback 25 | delay = typeof delay !== 'undefined' ? delay : 0; 26 | // check the shell version, if 2.5+ set legacy mode for unacked writes (for speed) 27 | var shellVersion = version().split('.').map(Number); 28 | if ( shellVersion[0] > 2 ) { 29 | db1.getMongo().forceWriteMode("legacy"); 30 | } else if (shellVersion[0] == 2) { 31 | if (shellVersion[1] > 4) { 32 | db1.getMongo().forceWriteMode("legacy"); 33 | } 34 | }; 35 | 36 | // with the document we are using, 68 iterations of this loop will get you ~1033MiB of data (not uncluding indexes), so use that as a multiplier 37 | var startTime = new Date(); 38 | 39 | for(var j = 0; j < (numGB * 68); j++){ 40 | // going to create a big array of docs, then insert them 41 | var bigDoc = []; 42 | for(var i = 0; i < 66400; i++){ // 132800 gets pretty close to the max doc size but takes a bit too long to generate on my machine, leaving gaps, so divide by 2 43 | var randomNum = Math.random(); // generate a single random number per loop iteration 44 | var ranDate = new Date(Math.floor(1500000000000 * randomNum)); 45 | // let's construct a random ObjectId based on the number, basically construct a string with the info we need and some randomness 46 | // first piece is 4 bytes (8 hex digits), need to pad with zeroes for low values (same with random end piece) 47 | // next pieces per the spec are 6 hex digits for the machine, 4 digits for the PID 48 | // instead we will insert 10 placeholder characters for expedience 49 | // then, round things out with 3 bytes of randomness per the spec, and use the increment on the loop to avoid collisions 50 | var ranString = (Math.floor(randomNum * 1500000000).toString(16)).pad(8, false, 0) + "adacefd123" + ((Math.floor(randomNum * 16710815) + i).toString(16)).pad(6, false, 0); 51 | // this one would be better, but too slow to generate: 52 | // var ranString = ((Math.floor(1500000000 * randomNum)).toString(16)).pad(8, false, "0") + db1.version().replace(/\./g, "") + "adacefd" + ((Math.floor(randomNum * 9920329) + i).toString(16)).pad(6, false, "0"); 53 | var ranId = new ObjectId(ranString); 54 | // To explain the document: 55 | // _id and ranDate are both based on the same randomly generated date, but ranDate has millis and is a bit easier to parse 56 | // After that we add an integer, boolean and a small array with a string and a number (array is useful for growth later) 57 | bigDoc.push({_id : ranId, ranDate : ranDate, ranInt : NumberInt(randomNum * 1000000), ranBool : (randomNum < 0.5 ? true : false), smallArray : [randomNum, randomNum.toString()]}); 58 | // if there is a non-default delay specified, use it 59 | if(delay > 0){ 60 | sleep(delay); 61 | } 62 | }; 63 | db1.data.insert(bigDoc); 64 | 65 | if(j == (numGB * 34)){ 66 | print("Approximately 50% done: " + (j * 66400) + " docs inserted in " + (new Date() - startTime)/1000 + " seconds"); 67 | }; 68 | 69 | }; 70 | var timeTaken = ((new Date() - startTime)/1000); 71 | print("Run complete: " + (numGB * 68 * 66400) + " docs inserted in " + timeTaken + " seconds. Average insertion rate: " + ((numGB * 136 * 33200)/timeTaken) + " docs per second"); 72 | // clean up the write mode if altered at the top 73 | if(db1.getMongo().writeMode() == "legacy"){ 74 | db1.getMongo().forceWriteMode("commands"); 75 | } 76 | }; 77 | 78 | // Sample runs of the createData script on a standalone mongod, both run on same 8 core Linux host, not IO bound 79 | // Single thread CPU for shell was close to max, as was database lock on mongod - results within margin of error for the versions: 80 | 81 | // 2.6.3 - Run complete: 4515200 docs inserted in 148.452 seconds. Average insertion rate: 30415.218387088084 docs per second 82 | // 2.4.10 - Run complete: 4515200 docs inserted in 146.916 seconds. Average insertion rate: 30733.20809169866 docs per second 83 | 84 | 85 | // Next, the reads - we'll do this randomly across the set 86 | // Takes 2 arguments: 87 | // numGB is the approximate data size to create in GiB (integer usually, but any number should work) 88 | // dbName is the database to use (defaults to a collection called data) 89 | 90 | preHeatRandomData = function(numGB, dbName) { 91 | 92 | // We will brute force this basically, the _id is indexed and we know how it was constructed 93 | // The first 8 hex digits of the pseudo ObjectID we created have a maximum of 16^6 docs, but likely far less 94 | // A bit of experimentation tells me that using all 8 digits is too slow (low hit rate) 95 | // Even 6 digits is still only 256 second ranges and yielded an average of less than 2 docs per range in limited tests 96 | // Hence, we will do a range query using the first 5 digits plus fixed strings to create the start/end of the range 97 | // Thats 16^3 or 4096 secs, so not an unreasonable range to query in general (1GiB set tests yielded ~12 docs per range) 98 | // Every time we do the range query, we will call explain, and then increment the results by the nscanned count 99 | // 100 | // Note: decent chance there will be collisions, so may need to "oversubscribe" the amount of data to be touched 101 | 102 | var docHits = 0; 103 | var noHits = 0; 104 | var iterations = 0; // not really needed other than for stats 105 | 106 | var db1 = db.getSiblingDB(dbName); 107 | 108 | var startTime = new Date(); // time the loop 109 | while(docHits < (5000000 * numGB)) { 110 | // the creation of the string is identical to the creation code 111 | var randomNum = Math.random(); 112 | var ranString = (Math.floor(randomNum * 1500000000).toString(16)).pad(8, false, 0); 113 | // we just strip the last 3 characters to allow us to create ranges - 3 characters is only 4096 seconds 114 | ranString = ranString.substring(0, ranString.length - 3) 115 | var beginId = new ObjectId(ranString + "000adacefd123000000"); 116 | var endId = new ObjectId(ranString + "fffadacefd123ffffff"); 117 | // simple ranged query on _id with an explicit hint and an explain so we exhaust the cursor and get useful stats back 118 | var result = db1.data.find({_id : {$gte : beginId, $lte : endId}}).hint({_id : 1}).explain(); 119 | if(result.nscanned > 0) { 120 | docHits += result.nscanned; //increment by number of scanned if not empty 121 | } else { 122 | noHits++; // record the lack of hits 123 | }; 124 | iterations++; // total iterations 125 | // warn about low hit rates at each 250k no hit occurrences 126 | if((noHits % 250000) == 0 && noHits > 0){ 127 | print("Warning: hit rate is poor - just passed " + noHits + " iterations with no hits (current hits doc hits are: " + docHits + " out of " + (5000000 * numGB) + " or " + docHits/(50000 * numGB) + "%)."); 128 | }; 129 | }; 130 | var endTime = new Date(); 131 | // some info on the time taken, hit rate etc. 132 | print(numGB + "GiB of data loaded (" + (numGB * 5000000) + " docs), took " + (endTime - startTime)/1000 + " seconds to complete (average: " + (numGB * 5000000)/((endTime - startTime)/1000) + " docs/sec)") 133 | print(noHits + " queries hit 0 documents (" + (noHits*100)/iterations + "%) and there were " + iterations + " total iterations." ); 134 | print("Average number of docs scanned per iteration (hits only): " + (numGB * 5000000)/(iterations - noHits) ); 135 | }; 136 | 137 | // update docs, optionally making them grow (creates free list) 138 | 139 | updateRandomData = function(numGB, dbName, growDocs){ 140 | 141 | // quick test shows that with powerOf2Sizes, need to add 9 ObjectIds to the smallArray to trigger a move 142 | // so growing the docs will take a lot more updates in order to complete the run 143 | // testing for a move is a little clunky until we get better write command stats, so pushing that to its own function for now 144 | var db1 = db.getSiblingDB(dbName); 145 | var updateHits = 0; 146 | var growthOverhead = 0; 147 | var startTime = new Date(); // time the loop 148 | while(updateHits < (5000000 * numGB)){ 149 | // we'll re-use the logic from the finds, create a range to look for a candidate document 150 | var randomNum = Math.random(); 151 | var ranString = (Math.floor(randomNum * 1500000000).toString(16)).pad(8, false, 0); 152 | // we just strip the last 3 characters to allow us to create ranges - 3 characters is only 4096 seconds 153 | // this is looking pretty inefficient at finding data in a 2GB data set for testing, may need to increase the ranges 154 | ranString = ranString.substring(0, ranString.length - 3) 155 | var beginId = new ObjectId(ranString + "000adacefd123000000"); 156 | var endId = new ObjectId(ranString + "fffadacefd123ffffff"); 157 | var result = 0; 158 | // simple find on _id with a hint and next() to get the first doc off the cursor 159 | // loop until we have a valid result (in case of misses), and we will use the ranInt to not hit docs twice 160 | while(result == 0){ 161 | result = db1.data.find({_id : {$gte : beginId, $lte : endId}, ranInt : {$lte : 1000000}}).hint({_id : 1}).next(); 162 | } 163 | if(growDocs){ 164 | growthOverhead += pushUntilMoved(dbName, result._id, false); 165 | db1.data.update({_id : result._id}, {$inc : {ranInt : 1000000}}); 166 | updateHits++; 167 | } else { 168 | db1.data.update({_id : result._id}, {$inc : {ranInt : 1000000}}); 169 | updateHits++; 170 | } 171 | } 172 | var endTime = new Date(); 173 | 174 | if(growDocs){ 175 | print("Updated " + updateHits + " docs in " + (endTime - startTime)/1000 + " seconds (avg: " + (5000000 * numGB)/((endTime - startTime)/1000) + " docs/sec. Growth required an average of " + (growthOverhead/updateHits) + " pushes to the array."); 176 | } else { 177 | print("Updated " + updateHits + " docs in " + (endTime - startTime)/1000 + " seconds (avg: " + (5000000 * numGB)/((endTime - startTime)/1000) + " docs/sec."); 178 | }; 179 | 180 | } 181 | 182 | // this little function will take an ObjectID, then push new IDs to the smallArray until the document moves on disk 183 | // verbose toggles information about old/new location and number of pushes required (will be more for powerOf2 docs) 184 | // it's needed to provide the move functionality in the update function 185 | pushUntilMoved = function(dbName, docID, verbose){ 186 | var db1 = db.getSiblingDB(dbName); 187 | var currentLoc = db1.data.find({_id : docID}).showDiskLoc().next().$diskLoc; 188 | var newLoc = currentLoc; 189 | var pushes = 0; 190 | while((currentLoc.file == newLoc.file) && (currentLoc.offset == newLoc.offset)){ 191 | db1.data.update({_id : docID}, {$push : {smallArray : new ObjectId()}}); 192 | newLoc = db1.data.find({_id : docID}).showDiskLoc().next().$diskLoc; 193 | pushes++; 194 | } 195 | if(verbose){ 196 | print("Old location: file: " + currentLoc.file + " offset: " + currentLoc.offset); 197 | print("New location: file: " + newLoc.file + " offset: " + newLoc.offset); 198 | print("Pushes required: " + pushes); 199 | } 200 | return pushes; 201 | } 202 | 203 | 204 | // delete docs, create holes and a free list 205 | 206 | deleteRandomData = function(numGB, dbName){ 207 | var db1 = db.getSiblingDB(dbName); 208 | var delHits = 0; 209 | 210 | // this one is actually far more simple in 2.6 with the write results, so writing that first, may not bother with 2.4 211 | var startTime = new Date(); // time the loop 212 | while(delHits < (5000000 * numGB)){ 213 | // we'll re-use the logic from the finds/updates, create a range to look for a candidate document 214 | var randomNum = Math.random(); 215 | var ranString = (Math.floor(randomNum * 1500000000).toString(16)).pad(8, false, 0); 216 | ranString = ranString.substring(0, ranString.length - 3) 217 | var beginId = new ObjectId(ranString + "000adacefd123000000"); 218 | var endId = new ObjectId(ranString + "fffadacefd123ffffff"); 219 | var result = db1.data.remove({_id : {$gte : beginId, $lte : endId}}, 1); // just remove one doc at a time 220 | delHits += result.nRemoved; 221 | } 222 | var endTime = new Date(); 223 | print("Removed " + delHits + " docs in " + (endTime - startTime)/1000 + " seconds (avg: " + (5000000 * numGB)/((endTime - startTime)/1000) + " docs/sec."); 224 | 225 | } 226 | --------------------------------------------------------------------------------