"]
12 | loop do
13 | page = @pages.parent_of(page)
14 | break if page.nil?
15 | list << "
#{link_to_page(page)}
"
16 | end
17 | list.reverse!
18 |
19 | html = "
\n"
20 | html << list.join("\n")
21 | html << "\n
\n"
22 | html
23 | end
24 | end # module Breadcrumbs
25 |
26 | Webby::Helpers.register(BreadcrumbsHelper)
27 |
28 | # EOF
29 |
--------------------------------------------------------------------------------
/lib/webby/page.rb:
--------------------------------------------------------------------------------
1 | # Monkey patch Webby to have nicer URLs.
2 | # Generate each html pages in a /index.html file.
3 | # So the URL will look like /
4 | # Credits to Marc-André Cournoyer.
5 | module Webby::Resources
6 | class Page < Resource
7 | def destination
8 | dest = super
9 | if prettify?
10 | File.join(File.dirname(dest),
11 | File.basename(dest, ".*"),
12 | "index.html")
13 | else
14 | dest
15 | end
16 | end
17 |
18 | def url
19 | if prettify?
20 | super.gsub(/index\.html$/, "")
21 | else
22 | super
23 | end
24 | end
25 |
26 | private
27 | def prettify?
28 | filename != "index" && extension == "html"
29 | end
30 | end
31 | end
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MongoDB Cookbook is inspired by The Redis Cookbook, Copyright (c)
2 | 2010 Ted Nyman [http://ted.io], Tim Lossen [http://tim.lossen.de]
3 |
4 | This work is licensed under the Creative Commons Attribution
5 | Share Alike Unported License (Version 3.0):
6 |
7 | http://creativecommons.org/licenses/by_sa/3.0/legalcode
8 |
9 | Summary:
10 |
11 | You are free to share (to copy, distribute and transmit) and
12 | to remix (to adapt) this work -- under the following
13 | conditions:
14 |
15 | (a) You must attribute the work in the manner specified by the
16 | author or licensor (but not in any way that suggests that they
17 | endorse you or your use of the work).
18 |
19 | (b) If you alter, transform, or build upon this work, you may
20 | distribute the resulting work only under the same, similar or
21 | a compatible license.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # The MongoDB Cookbook
2 |
3 | ### The ways and hows of MongoDB.
4 |
5 | Contribute your patterns, methods, and ideas to the MongoDB Cookbook.
6 |
7 | - - -
8 |
9 | ### How to Contribute a Recipe
10 |
11 | 1. Fork this repo and create a new topic branch.
12 | 2. Make your addition. See one of the recipe files for a sample.
13 | 3. Send a pull request -- please include a short description of your new or updated recipe.
14 | 4. Enjoy your awesomeness.
15 |
16 | ### Even More Ways To Contribute
17 |
18 | You can:
19 |
20 | * Add example code for any existing recipe, in any programming language.
21 | Fork the repo and add your code into the relevant directory!
22 | * Work on the cookbook.mongodb.org website. All the site lives in `content`.
23 | * Look for typos, formatting errors, missing links, and other little things.
24 | No potential improvement is 'too small' -- fork for anything.
25 |
26 | ### License
27 |
28 | Creative Commons Attribution Share Alike 3.0
29 | Inspired by The Redis Cookbook (http://rediscookbook.org)
30 |
31 | ### Gems to Install
32 | * gem install webby
33 | * gem install ultraviolet
34 | * gem install maruku
35 | * gem install RedCloth
36 | * gem install rdiscount
37 |
--------------------------------------------------------------------------------
/content/index.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: The MongoDB Cookbook
3 | created_at: 2008-08-02 14:06:40.000000 -06:00
4 | dirty: true
5 | filter:
6 | - erb
7 | ---
8 |
A Cookbook for MongoDB
9 |
Welcome to the MongoDB cookbook. Here, we hope to provide guidance on all the common ways of using MongoDB. Got any wisdom to share? You can submit a recipe via GitHub by following the instructions in the README.
10 |
11 |
12 |
13 | If you're new to MongoDB, be sure to check out:
14 |
Can find the answer you're looking for? Send a question to the MongoDB User List or check out #mongodb on irc.freenode.net.
35 |
--------------------------------------------------------------------------------
/templates/page.erb:
--------------------------------------------------------------------------------
1 | ---
2 | title: <%= title %>
3 | created_at: <%= Time.now.to_y %>
4 | filter:
5 | - erb
6 | - textile
7 | ---
8 | p(title). <%%= h(@page.title) %>
9 |
10 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc congue ipsum vestibulum libero. Aenean vitae justo. Nam eget tellus. Etiam convallis, est eu lobortis mattis, lectus tellus tempus felis, a ultricies erat ipsum at metus.
11 |
12 | h2. Litora Sociis
13 |
14 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Morbi et risus. Aliquam nisl. Nulla facilisi. Cras accumsan vestibulum ante. Vestibulum sed tortor. Praesent tempus fringilla elit. Ut elit diam, sagittis in, nonummy in, gravida non, nunc. Ut orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos hymenaeos. Nam egestas, orci eu imperdiet malesuada, nisl purus fringilla odio, quis commodo est orci vitae justo. Aliquam placerat odio tincidunt nulla. Cras in libero. Aenean rutrum, magna non tristique posuere, erat odio eleifend nisl, non convallis est tortor blandit ligula. Nulla id augue.
15 |
16 | bq. Nullam mattis, odio ut tempus facilisis, metus nisl facilisis metus, auctor consectetuer felis ligula nec mauris. Vestibulum odio erat, fermentum at, commodo vitae, ultrices et, urna. Mauris vulputate, mi pulvinar sagittis condimentum, sem nulla aliquam velit, sed imperdiet mi purus eu magna. Nulla varius metus ut eros. Aenean aliquet magna eget orci. Class aptent taciti sociosqu ad litora.
17 |
18 | Vivamus euismod. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Suspendisse vel nibh ut turpis dictum sagittis. Aliquam vel velit a elit auctor sollicitudin. Nam vel dui vel neque lacinia pretium. Quisque nunc erat, venenatis id, volutpat ut, scelerisque sed, diam. Mauris ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec mattis. Morbi dignissim sollicitudin libero. Nulla lorem.
19 |
--------------------------------------------------------------------------------
/content/css/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
3 | font-size: 14px;
4 | margin: 0;
5 | color: #303030;
6 | }
7 |
8 | a {
9 | text-decoration: none;
10 | color: #3471CD;
11 | }
12 |
13 | a:hover {
14 | text-decoration: underline;
15 | color: #3471CD;
16 | }
17 |
18 | hr {
19 | size: 100%;
20 | border-top: 1px dotted #999;
21 | }
22 |
23 | p {
24 | font-size: 14px;
25 | }
26 |
27 | h1 {
28 | font-size: 30px;
29 | }
30 |
31 | h2 {
32 | font-size: 25px;
33 | }
34 |
35 | h3 {
36 | font-size:18px;
37 | }
38 |
39 | h4 {
40 | font-size:14px;
41 | }
42 |
43 | body code {
44 | color: #444;
45 | font-size: 120%;
46 | }
47 |
48 | body pre {
49 | padding: 1em;
50 | border: 1px solid #dfe2e5;
51 | overflow-x: auto;
52 | }
53 |
54 | .header {
55 | padding-top: 20px;
56 | padding-left: 100px;
57 | top: 0px;
58 | height: 120px;
59 | background-color: #3F2916;
60 | border-bottom:2px solid #999;
61 | }
62 |
63 | .wrapper {
64 | width: 900px;
65 | margin-top:40px;
66 | margin-bottom:100px;
67 | }
68 |
69 | .main {
70 | float:left;
71 | padding-left: 100px;
72 | margin-bottom: 40px;
73 | width: 750px;
74 | }
75 |
76 | .credit {
77 | font-style: italic;
78 | }
79 |
80 | .index li {
81 | list-style-image: url("/img/icon1.png");
82 | list-style-type: square;
83 | }
84 |
85 | .footer {
86 | font-size: 13px;
87 | clear: both;
88 | padding: 25px 0 30px 100px;
89 | border-top: 2px solid #999;
90 | height: 40px;
91 | }
92 |
93 | .large {
94 | font-size: 15px;
95 | line-height: 20px;
96 | }
97 |
98 | ul.breadcrumbs {
99 | list-style-type: none;
100 | padding: 0;
101 | margin: 0;
102 | }
103 |
104 | ul.breadcrumbs li {
105 | float:left;
106 | line-height:2em;
107 | padding-right:.75em;
108 | }
109 |
110 | ul.breadcrumbs li a {
111 | display: block;
112 | }
113 |
114 | .clearfix:after {
115 | content: ".";
116 | display: block;
117 | height: 0;
118 | clear: both;
119 | visibility: hidden;
120 | }
121 |
--------------------------------------------------------------------------------
/content/patterns/finding_max_and_min_values_for_a_key.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Finding Max And Min Values for a given Key
3 | created_at: 2010-05-24 20:16:24.036546 -05:00
4 | recipe: true
5 | author: Eliot Horowitz
6 | description: How to use MapReduce to find the min and max values for a given key
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You want to find the min and max value for a given field per key.
15 |
16 | <% code 'javascript' do %>
17 | {
18 | "_id" : "post 1",
19 | "author" : "Bob",
20 | "content" : "...",
21 | "page_views" : 5
22 | }
23 | {
24 | "_id" : "post 2",
25 | "author" : "Bob",
26 | "content" : "...",
27 | "page_views" : 9
28 | }
29 | {
30 | "_id" : "post 3",
31 | "author" : "Bob",
32 | "content" : "...",
33 | "page_views" : 8
34 | }
35 | <% end %>
36 |
37 | We want to end up with a collection of authors and their least and most popular posts:
38 |
39 | <% code 'javascript' do %>
40 | { _id : "Bob",
41 | value : { min : { page_views : 5 , _id : "post 1" } ,
42 | max : { page_views , 9 , _id : "post 3" } } }
43 | <% end %>
44 |
45 | ### Solution
46 |
47 | Use the `mapreduce` database command. Emit each document_id and version in the map function,
48 | then use the reduce function to find the max version.
49 |
50 | #### 1. Map
51 |
52 | <% code 'javascript' do %>
53 | map = function () {
54 | var x = { page_views : this.page_views , _id : this._id };
55 | emit(this.author, { min : x , max : x } )
56 | }
57 | <% end %>
58 |
59 | #### 2. Reduce
60 | <% code 'javascript' do %>
61 | reduce = function (key, values) {
62 | var res = values[0];
63 | for ( var i=1; i res.max.page_views )
67 | res.max = values[i].max;
68 | }
69 | return res;
70 | }
71 | <% end %>
72 |
73 | #### 3. Call the `mapreduce` command
74 | <% code 'javascript' do%>
75 | db.posts.mapReduce( map , reduce , { out : { inline : true } } )
76 | <% end %>
77 | ### See Also
78 |
79 | * The MongoDB [docs on mapreduce][1]
80 |
81 | [1]: http://www.mongodb.org/display/DOCS/MapReduce
82 |
--------------------------------------------------------------------------------
/content/patterns/pivot.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Pivot Data with Map reduce
3 | created_at: 2011-05-05 18:0:024.036546 -04:00
4 | recipe: true
5 | author: Gaetan Voyer-Perrault
6 | description: How to use map-reduce to pivot table data.
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You have a collection of Actors with an array of the Movies they've done.
15 |
16 | You want to generate a collection of Movies with an array of Actors in each.
17 |
18 | Some sample data
19 |
20 | <% code 'javascript' do %>
21 | db.actors.insert( { actor: "Richard Gere", movies: ['Pretty Woman', 'Runaway Bride', 'Chicago'] });
22 | db.actors.insert( { actor: "Julia Roberts", movies: ['Pretty Woman', 'Runaway Bride', 'Erin Brockovich'] });
23 | <% end %>
24 |
25 | ### Solution
26 |
27 | We need to loop through each movie in the Actor document and emit each Movie individually.
28 |
29 | The catch here is in the reduce phase. We cannot emit an array from the reduce phase, so we must build an Actors array inside of the "value" document that is returned.
30 |
31 | #### The code
32 |
33 | <% code 'javascript' do %>
34 | map = function() {
35 | for(var i in this.movies){
36 | key = { movie: this.movies[i] };
37 | value = { actors: [ this.actor ] };
38 | emit(key, value);
39 | }
40 | }
41 |
42 | reduce = function(key, values) {
43 | actor_list = { actors: [] };
44 | for(var i in values) {
45 | actor_list.actors = values[i].actors.concat(actor_list.actors);
46 | }
47 | return actor_list;
48 | }
49 | <% end %>
50 |
51 | Notice how actor_list is actually a javascript object that contains an array. Also notice that map emits the same structure.
52 |
53 | Run the following to execute the map / reduce, output it to the "pivot" collection and print the result:
54 |
55 | <% code 'javascript' do %>
56 | printjson(db.actors.mapReduce(map, reduce, "pivot"));
57 | db.pivot.find().forEach(printjson);
58 | <% end %>
59 |
60 | Here is the sample output, note that "Pretty Woman" and "Runaway Bride" have both "Richard Gere" and "Julia Roberts".
61 |
62 | { "_id" : { "movie" : "Chicago" }, "value" : { "actors" : [ "Richard Gere" ] } }
63 | { "_id" : { "movie" : "Erin Brockovich" }, "value" : { "actors" : [ "Julia Roberts" ] } }
64 | { "_id" : { "movie" : "Pretty Woman" }, "value" : { "actors" : [ "Richard Gere", "Julia Roberts" ] } }
65 | { "_id" : { "movie" : "Runaway Bride" }, "value" : { "actors" : [ "Richard Gere", "Julia Roberts" ] } }
66 |
--------------------------------------------------------------------------------
/content/patterns/date_range.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Querying for a Date Range (Specific Month or Day)
3 | created_at: 2010-04-20 15:03:24.036546 -04:00
4 | recipe: true
5 | author: Mike Dirolf
6 | description: How to query for documents from a certain month or day.
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You want to list all of the documents in a collection (in the example
15 | we'll use "posts") that were created in a particular month. Each
16 | document in the collection has a field representing the date it was
17 | created:
18 |
19 | <% code 'javascript' do %>
20 | {
21 | "title" : "A blog post",
22 | "author" : "Mike",
23 | "content" : "...",
24 | "created_on" : new Date();
25 | }
26 | <% end %>
27 |
28 | We want to perform a query to get all documents whose value for
29 | `created_on` is in the month of April, 2010.
30 |
31 | ### Solution
32 |
33 | Use a range query to query for documents whose value for `created_on`
34 | is greater than a Date representing the start of the month, and less
35 | than a Date representing the end.
36 |
37 | #### 1. Construct Date objects representing the start and end of the month
38 |
39 | Our first step is to construct Date instances that we can use to do
40 | the range query. In JavaScript:
41 |
42 | <% code 'javascript' do %>
43 | var start = new Date(2010, 3, 1);
44 | var end = new Date(2010, 4, 1);
45 | <% end %>
46 |
47 | Note that in JS the month portion of the Date constructor is
48 | 0-indexed, so the `start` variable above is April 1st and the `end`
49 | variable is May 1st. The logic here is similar in all languages, in Python we'd do:
50 |
51 | <% code 'python' do %>
52 | >>> from datetime import datetime
53 | >>> start = datetime(2010, 4, 1)
54 | >>> end = datetime(2010, 5, 1)
55 | <% end %>
56 |
57 | #### 2. Perform a range query
58 |
59 | Now that we have our reference dates, we can perform a range query to
60 | get the matching documents, note the use of the special `$` operators,
61 | `$gte` (greater-than) and `$lt` (less-than):
62 |
63 | <% code 'javascript' do %>
64 | db.posts.find({created_on: {$gte: start, $lt: end}});
65 | <% end %>
66 |
67 | Again, this translates nicely to other languages - in Python it's:
68 |
69 | <% code 'python' do %>
70 | >>> db.posts.find({"created_on": {"$gte": start, "$lt": end}})
71 | <% end %>
72 |
73 | #### 3. Use an index for performance
74 |
75 | To make these queries fast we can use an index on the `created_on` field:
76 |
77 | <% code 'javascript' do %>
78 | db.posts.ensureIndex({created_on: 1});
79 | <% end %>
80 |
81 | We can also use a compound index if we're performing a query on author
82 | and a date range, like so:
83 |
84 | <% code 'javascript' do %>
85 | db.posts.ensureIndex({author: 1, created_on: 1});
86 | db.posts.find({author: "Mike", created_on: {$gt: start, $lt: end}});
87 | <% end %>
88 |
89 |
--------------------------------------------------------------------------------
/content/patterns/track_max_value_in_array.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Track maximum value in array
3 | created_at: 2011-12-14 13:30:00.000000 -04:00
4 | recipe: true
5 | author: Dan Crosta
6 | description: How to keep a "max_value" attribute up to date when pushing values to an array
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ## Problem
13 |
14 | Your document contains an array of numbers and you want to add an
15 | attribute to the document which contains the maximum value in the array.
16 | You want to ensure that the document is updated safely and atomically so
17 | that this value always represents the maximum value after any number of
18 | additions to the array.
19 |
20 | ### Assumptions
21 |
22 | * You are updating the document by its `_id` or another unique field.
23 | * You know the document already exists (i.e. you are not "upserting.")
24 |
25 | ## Solution
26 |
27 | MongoDB's atomic updates to not allow you to perform in-document
28 | comparisons when updating--that is, there is no operator which will
29 | update a value *if and only if* it is greater than the existing
30 | value. Such an operator would render this recipe trivial.
31 |
32 | However, you can accomplish this task with two invocations of the
33 | `findAndModify` command:
34 |
35 | 1. Issue a `findAndModify` that sets the `max_value` and pushes to the
36 | array at the same time. This operation only succeeds if the
37 | `max_value` is less than or equal to the new value.
38 |
39 | 2. If the previous operation fails, it can only be because `max_value`
40 | is already greater than the new value, so it is safe to push the new
41 | value without regard for `max_value`.
42 |
43 |
44 | To obtain the result of the `findAndModify` command, take the first
45 | result that succeeds and assign it to the `result` variable. Because the
46 | second `findAndModify` only runs if the preceding operations made no
47 | updates, then we know that there can only ever be a single value of
48 | `result`.
49 |
50 | The code for this operation resembles:
51 |
52 | <% code 'javascript' do %>
53 | var result1 = null, result2 = null;
54 |
55 | result1 = db.collection.findAndModify({
56 | query: {_id: ObjectId(...), max_value: {$lte: new_value}},
57 | update: {$push: {array: new_value}, $set: {max_value: new_value}}});
58 |
59 | if (result1 === null ) {
60 | result3 = db.collection.findAndModify({
61 | query: {_id: ObjectId(...)},
62 | update: {$push: {array: new_value}}});
63 | }
64 |
65 | var result = result1 || result2;
66 | <% end %>
67 |
68 | ## Variations
69 |
70 | If you want the `result` variable to include the changes made by
71 | whichever of the two `findAndModify`s succeeded, add `new: true` to the
72 | arguments to `findAndModify`.
73 |
74 | If you want the `array` attribute of the document to contain a set of
75 | unique values, rather than an array of all values pushed, use the
76 | `$addToSet` operator rather than `$push`.
77 |
--------------------------------------------------------------------------------
/content/patterns/count_tags.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Counting Tags
3 | created_at: 2010-04-19 10:05:24.036546 -04:00
4 | recipe: true
5 | author: Kristina Chodorow
6 | description: How to use MapReduce to find the tags for a given collection.
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You want to create a tag cloud or see what the most popular tags are in a given
15 | collection, say, "posts". Each document in the collection has an array of tags,
16 | such as:
17 |
18 | <% code 'javascript' do %>
19 | {
20 | "title" : "A blog post",
21 | "author" : "Kristina",
22 | "content" : "...",
23 | "tags" : ["MongoDB", "Map/Reduce", "Recipe"]
24 | }
25 | <% end %>
26 |
27 | We want to end up with a "tags" collection that has documents that look like
28 | this:
29 |
30 | <% code 'javascript' do %>
31 | {"_id" : "MongoDB", "value" : 4}
32 | {"_id" : "Map/Reduce", "value" : 2}
33 | {"_id" : "Recipe", "value" : 7}
34 | {"_id" : "Group", "value" : 1}
35 | <% end %>
36 |
37 | ### Solution
38 |
39 | Use the `mapreduce` database command. Emit each tag in the map function, then
40 | count them in the reduce function.
41 |
42 | #### 1. Map
43 |
44 | The map function first checks if there is a tags field, as running a for-loop on
45 | undef would cause an error. Once that has been established, we go through each
46 | element, emiting the tag name and a count of 1:
47 |
48 | <% code 'javascript' do %>
49 | map = function() {
50 | if (!this.tags) {
51 | return;
52 | }
53 |
54 | for (index in this.tags) {
55 | emit(this.tags[index], 1);
56 | }
57 | }
58 | <% end %>
59 |
60 | #### 2. Reduce
61 |
62 | For the reduce function, we initialize a counter to 0 and then add each element
63 | of the `current` array to it. Then we return the final count.
64 |
65 | <% code 'javascript' do %>
66 | reduce = function(previous, current) {
67 | var count = 0;
68 |
69 | for (index in current) {
70 | count += current[index];
71 | }
72 |
73 | return count;
74 | }
75 | <% end %>
76 |
77 | #### 3. Call the `mapreduce` command
78 |
79 | We want to put the results in the "tags" collection, so we'll specify that with
80 | the `out` parameter:
81 |
82 | <% code 'javascript' do %>
83 | > result = db.runCommand({
84 | ... "mapreduce" : "posts",
85 | ... "map" : map,
86 | ... "reduce" : reduce,
87 | ... "out" : "tags"})
88 | <% end %>
89 |
90 | Now, if we query the tags collection, we find:
91 |
92 | <% code 'javascript' do %>
93 | > db.tags.find()
94 | {"_id" : "MongoDB", "value" : 4}
95 | {"_id" : "Map/Reduce", "value" : 2}
96 | {"_id" : "Recipe", "value" : 7}
97 | {"_id" : "Group", "value" : 1}
98 | <% end %>
99 |
100 | ### See Also
101 |
102 | * The MongoDB [docs on aggregation][1]
103 | * [Map-Reduce Basics][2] by Kyle Banker
104 | * [MapReduce: the Fanfiction][3] by Kristina Chodorow
105 |
106 | [1]: http://www.mongodb.org/display/DOCS/Aggregation
107 | [2]: http://kylebanker.com/blog/2009/12/mongodb-map-reduce-basics/
108 | [3]: http://www.snailinaturtleneck.com/blog/2010/03/15/mapreduce-the-fanfiction/
109 |
110 |
--------------------------------------------------------------------------------
/content/css/code.css:
--------------------------------------------------------------------------------
1 | pre.twilight .DiffInserted {
2 | background-color: #253B22;
3 | color: #F8F8F8;
4 | }
5 | pre.twilight .DiffHeader {
6 | background-color: #0E2231;
7 | color: #F8F8F8;
8 | font-style: italic;
9 | }
10 | pre.twilight .CssPropertyValue {
11 | color: #F9EE98;
12 | }
13 | pre.twilight .CCCPreprocessorDirective {
14 | color: #AFC4DB;
15 | }
16 | pre.twilight .Constant {
17 | color: #CF6A4C;
18 | }
19 | pre.twilight .DiffChanged {
20 | background-color: #4A410D;
21 | color: #F8F8F8;
22 | }
23 | pre.twilight .EmbeddedSource {
24 | background-color: #A3A6AD;
25 | }
26 | pre.twilight .Support {
27 | color: #9B859D;
28 | }
29 | pre.twilight .MarkupList {
30 | color: #F9EE98;
31 | }
32 | pre.twilight .CssConstructorArgument {
33 | color: #8F9D6A;
34 | }
35 | pre.twilight .Storage {
36 | color: #F9EE98;
37 | }
38 | pre.twilight .line-numbers {
39 | background-color: #5C5B51;
40 | color: #D1D0B8;
41 | }
42 | pre.twilight .CssClass {
43 | color: #9B703F;
44 | }
45 | pre.twilight .StringConstant {
46 | color: #DDF2A4;
47 | }
48 | pre.twilight .CssAtRule {
49 | color: #8693A5;
50 | }
51 | pre.twilight .MetaTagInline {
52 | color: #E0C589;
53 | }
54 | pre.twilight .MarkupHeading {
55 | color: #CF6A4C;
56 | }
57 | pre.twilight .CssTagName {
58 | color: #CDA869;
59 | }
60 | pre.twilight .SupportConstant {
61 | color: #CF6A4C;
62 | }
63 | pre.twilight .DiffDeleted {
64 | background-color: #420E09;
65 | color: #F8F8F8;
66 | }
67 | pre.twilight .CCCPreprocessorLine {
68 | color: #8996A8;
69 | }
70 | pre.twilight .StringRegexpSpecial {
71 | color: #CF7D34;
72 | }
73 | pre.twilight .EmbeddedSourceBright {
74 | background-color: #9C9EA4;
75 | }
76 | pre.twilight .InvalidIllegal {
77 | background-color: #241A24;
78 | color: #F8F8F8;
79 | }
80 | pre.twilight .SupportFunction {
81 | color: #DAD085;
82 | }
83 | pre.twilight .CssAdditionalConstants {
84 | color: #CA7840;
85 | }
86 | pre.twilight .MetaTagAll {
87 | color: #AC885B;
88 | }
89 | pre.twilight .StringRegexp {
90 | color: #E9C062;
91 | }
92 | pre.twilight .StringEmbeddedSource {
93 | color: #DAEFA3;
94 | }
95 | pre.twilight .EntityInheritedClass {
96 | color: #9B5C2E;
97 | font-style: italic;
98 | }
99 | pre.twilight .CssId {
100 | color: #8B98AB;
101 | }
102 | pre.twilight .CssPseudoClass {
103 | color: #8F9D6A;
104 | }
105 | pre.twilight .StringVariable {
106 | color: #8A9A95;
107 | }
108 | pre.twilight .String {
109 | color: #8F9D6A;
110 | }
111 | pre.twilight .Keyword {
112 | color: #CDA869;
113 | }
114 | pre.twilight {
115 | background-color: #141414;
116 | color: #F8F8F8;
117 | }
118 | pre.twilight .CssPropertyName {
119 | color: #C5AF75;
120 | }
121 | pre.twilight .DoctypeXmlProcessing {
122 | color: #494949;
123 | }
124 | pre.twilight .InvalidDeprecated {
125 | color: #D2A8A1;
126 | font-style: italic;
127 | }
128 | pre.twilight .Variable {
129 | color: #7587A6;
130 | }
131 | pre.twilight .Entity {
132 | color: #9B703F;
133 | }
134 | pre.twilight .Comment {
135 | color: #5F5A60;
136 | font-style: italic;
137 | }
138 |
139 |
--------------------------------------------------------------------------------
/content/patterns/votes.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Voting with Atomic Operators
3 | created_at: 2010-04-19 10:05:24.036546 -04:00
4 | recipe: true
5 | author: Kyle Banker
6 | description: How to use MongoDB atomic operators to implement efficient voting.
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You want to give your users the ability to vote on things. Whether it's articles,
15 | comments, photos, or tweets, it seems like everything needs voteability.
16 |
17 | * Make sure that each user gets just one vote.
18 | * Keep a counter cache on the number of votes.
19 |
20 | ### Solution
21 |
22 | The solution is provided in JavaScript; translating to the language of your choice
23 | should be pretty straightforward.
24 |
25 | #### 1. Store the vote information in the object itself.
26 |
27 | Let's say you're building a social news site like Digg. You want your users to be able
28 | to vote on submitted stories. Here's a sample story document with all the information
29 | required for voting:
30 |
31 | <% code 'javascript' do %>
32 | {'_id': ObjectId("4bcc9e697e020f2d44471d27"),
33 | title: 'Aliens discovered on Mars!',
34 | description: 'Martian'
35 | vote_count: 0,
36 | voters: []
37 | }
38 | <% end %>
39 |
40 | Notice that we've reserved two fields for voting: the first is an integer caching the number of votes,
41 | and the second is a list of voters.
42 |
43 | #### 2. Use an atomic update operation for adding and removing votes.
44 |
45 | Here you get to see what's great about atomic operators. You can reliably add the vote, without
46 | risking a duplicate, in a single operation. Here's the code to update the story above:
47 |
48 | <% code 'javascript' do %>
49 | // Get the user id who's voting
50 | user_id = ObjectId("4bcc9e697e020f2d44471a15");
51 |
52 | // This query succeeds only if the voters array doesn't contain the user
53 | query = {_id: ObjectId("4bcc9e697e020f2d44471d27"), voters: {'$ne': user_id});
54 |
55 | // Update to add the user to the array and increment the number of votes.
56 | update = {'$push': {'voters': user_id}, '$inc': {vote_count: 1}}
57 |
58 | db.stories.update(query, update);
59 | <% end %>
60 |
61 | #### 3. If you want to allow users to retract their votes, the code is quite simiar:
62 |
63 | The only difference is that we use the **$pull** operator, and we decrement by passing
64 | -1 to **$inc**.
65 |
66 | <% code 'javascript' do %>
67 | // This query succeeds when the voter has already voted on the story.
68 | query = {_id: ObjectId("4bcc9e697e020f2d44471d27"), voters: user_id};
69 |
70 | // Update to remove the user from the array and decrement the number of votes.
71 | update = {'$pull': {'voters': user_id}, '$inc': {vote_count: -1}}
72 |
73 | db.stories.update(query, update);
74 | <% end %>
75 |
76 | ### Discussion
77 |
78 | One thing to note is that because the operation of step 2 uses the **$ne** operator, that part of the query
79 | can't use an index. This may become a problem if you expect many hundreds of votes per story; any fewer
80 | shouldn't be a concern.
81 |
82 | By contrast, the query in step 3 _can_ use a compound index efficiently:
83 |
84 | <% code 'javascript' do %>
85 | db.stories.ensureIndex({'_id': 1, voters: 1});
86 | <% end %>
87 |
88 | However, you'd create this index only if you expect people to be changing their votes often (which usually
89 | isn't the case).
90 |
--------------------------------------------------------------------------------
/layouts/default.txt:
--------------------------------------------------------------------------------
1 | ---
2 | extension: html
3 | filter: erb
4 | description: A cookbook full of mouth-watering MongoDB recipes.
5 | ---
6 |
7 |
8 | <% if @page.recipe %>
9 | <%= @page.title %> | The MongoDB Cookbook
10 | <% else %>
11 | The MongoDB Cookbook
12 | <% end %>
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
38 | <% end %>
39 | <%= @content %>
40 |
41 | <% if @page.recipe %>
42 |
43 |
44 |
45 |
55 |
56 | blog comments powered by Disqus
57 | <% end %>
58 |
59 |
60 |
63 |
64 |
78 |
79 |
83 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/content/patterns/unique_items_map_reduce.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Counting Unique Items with Map-Reduce
3 | created_at: 2010-05-05 10:05:24.036546 -04:00
4 | recipe: true
5 | author: Kyle Banker
6 | description: How to use map-reduce to count unique items across a category.
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You have a collection that stores pageviews by user, and you want to count the total
15 | number of unique user visits per day using map-reduce.
16 |
17 | Each pageview document looks something like this:
18 |
19 | <% code 'javascript' do %>
20 | {
21 | "url" : "http://example.com/photos",
22 | "user_id" : ObjectID('4be1c916e031933119d78b30'),
23 | "date": "Wed May 05 2010 15:37:58 GMT-0400 (EDT)"
24 | }
25 | <% end %>
26 |
27 | The solution requires grouping the pageviews by day and then counting
28 | the total number of user visits and along with the number of unique visits for that day.
29 |
30 | ### Solution
31 |
32 | What's tricky about this situation is that it requires a two-pass map-reduce in order to scale well.
33 | The first pass involves grouping by date and user id. This allows us to group by user and day and
34 | returns, as a side effect, the number of pageviews per user per day.
35 |
36 | #### 1. First Pass
37 |
38 | ##### Map Step
39 |
40 | The only tricky part about the map function is making sure that we emit on the day. Since we're storing
41 | a full date, we need to parse out just the year, month, and date, and then emit on that value:
42 |
43 | <% code 'javascript' do %>
44 | map = function() {
45 | day = Date.UTC(this.date.getFullYear(), this.date.getMonth(), this.date.getDate());
46 |
47 | emit({day: day, user_id: this.user_id}, {count: 1});
48 | }
49 | <% end %>
50 |
51 | If you want a more efficient date calculation, you can use this:
52 |
53 | <% code 'javascript' do %>
54 | day = (24 * 60 * 60) % this.date;
55 | <% end %>
56 |
57 | ##### Reduce Step
58 |
59 | The reduce function is trivial, as it simply performs a count:
60 |
61 |
62 | <% code 'javascript' do %>
63 | reduce = "function(key, values) {
64 | var count = 0;
65 |
66 | values.forEach(function(v) {
67 | count += v['count'];
68 | });
69 |
70 | return {count: count};
71 | }"
72 | <% end %>
73 |
74 | ##### Run the command
75 |
76 | We run the mapReduce command, storing the output in the *pageview_results* collection:
77 |
78 | <% code 'javascript' do %>
79 | db.pageviews.mapReduce(map, reduce, {out: pageview_results});
80 | <% end %>
81 |
82 | #### 2. Second Pass
83 |
84 | ##### Map Step
85 |
86 | Now that we have a prelimiary set of results, we can do a second pass to count unique users by day.
87 | Here's the map function:
88 |
89 | <% code 'javascript' do %>
90 | map = "function() {
91 | emit(this['_id']['day'], {count: 1});
92 | }"
93 | <% end %>
94 |
95 | Because the first result set will store the *emit* key within an '_id' field, we have to reach into
96 | that object to get the date.
97 |
98 | ##### Reduce Step
99 |
100 | It turns out that the same reduce function will work for the second pass; no need to rewrite.
101 |
102 | ##### Running the command
103 |
104 | Now just run the mapReduce command on the result collection, and ouput to a new results collection.
105 |
106 | <% code 'javascript' do %>
107 | db.pageview_results.mapReduce(map, reduce, {out: pageview_results_unique});
108 | <% end %>
109 |
110 | Since we've specified that the output collection should be called *pageview_results_unique*, we can
111 | query that collection to see the results:
112 |
113 | <% code 'javascript' do %>
114 | db.pageview_results_unique.find();
115 | <% end %>
116 |
117 | That's all there is to it!
118 |
119 | #### 5. Limiting the Operation
120 |
121 | If our pageviews collection spans a long period of time, it might be prudent to run map-reduce over
122 | just a portion of the data. That can be achieved by passing a query selector to the map-reduce command. So,
123 | for instance, if we just wanted results from the past two weeks, we could run:
124 |
125 | <% code 'javascript' do %>
126 | two_weeks_ago = new Date(Date.now() - 60 * 60 * 24 * 14 * 1000);
127 | db.pageviews.mapReduce(map, reduce,
128 | {out: pageview_results, query: {date: {'$gt': two_weeks_ago}}});
129 | <% end %>
130 |
131 | ### See Also
132 |
133 | * The MongoDB [docs on aggregation][1]
134 | * [Map-Reduce Basics][2] by Kyle Banker
135 | * [MapReduce: the Fanfiction][3] by Kristina Chodorow
136 |
137 | [1]: http://www.mongodb.org/display/DOCS/Aggregation
138 | [2]: http://kylebanker.com/blog/2009/12/mongodb-map-reduce-basics/
139 | [3]: http://www.snailinaturtleneck.com/blog/2010/03/15/mapreduce-the-fanfiction/
140 |
141 |
--------------------------------------------------------------------------------
/content/patterns/random-attribute.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: The Random Attribute
3 | created_at: 2010-05-12 10:05:24.036546 -04:00
4 | recipe: true
5 | author: Alberto Lerner, Dwight Merriman, and Aaron Staple
6 | description:
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | Consider a scenario where you'd like to issue a query but would like
15 | to pick a random document in the result.
16 |
17 | <% code 'javascript' do %>
18 | photos.find({"author":"John Doe"})
19 | <% end %>
20 |
21 | Any 'John Doe' would do. But you'd like a different one each time you
22 | run the query. Sure, you can always count the resulting documents and
23 | randomly pick one. But in that case the query would be run in its
24 | entirety and all the results would be transferred to your app.
25 |
26 | Now, consider another scenario where you'd like to run a map/reduce
27 | but would be happy to trade result accuracy for performance. That is,
28 | you'd be happy to use a sample (of a given percentage) of you data.
29 | You don't really know the number of documents involved, but they are
30 | numerous.
31 |
32 |
33 | ### Solution
34 |
35 | We can add a special attribute in each document that we'll call here
36 | the 'random attribute,' RA. The RA needs to be, well, random. If you
37 | use, for instance, a number generated by Math.random() in Javascript,
38 | that would work.
39 |
40 | <% code 'javascript' do %>
41 | > db.docs.drop()
42 | > db.docs.save( { key : 1, ..., random : Math.random() } )
43 | > db.docs.save( { key : 1, ..., random : Math.random() } )
44 | > db.docs.save( { key : 2, ..., random : Math.random() } )
45 | ... many more insertions with 'key : 2' ...
46 | > db.docs.save( { key : 2, ..., random : Math.random() } )
47 | ...
48 | <% end %>
49 |
50 | If you use Math.random(), the random attribute in your documents could
51 | have any value from 0 to 1:
52 |
53 |
54 | <% code 'javascript' do %>
55 | ...
56 | { "_id" : ObjectId("4bfa81198cf5fc1002a42b91"), "key" : 2, "random" : 0.23578915913357468}
57 | ...
58 | { "_id" : ObjectId("4bfa81198cf5fc1002a42b93"), "key" : 2, "random" : 0.8983254666113549 }
59 | ...
60 | <% end %>
61 |
62 |
63 | The solution also requires the RA to be indexed in a certain way. But
64 | let's discuss this using an example.
65 |
66 |
67 |
68 | ### 1. Picking a random document from the result
69 |
70 | If you're just interested in one document, you'd formulate your
71 | original query and add a filter over the RA. The idea here is
72 | that you'd try to find which of the result documents has the closest
73 | RA to a random number you draw at query time.
74 |
75 | The code below shows one way to do it. You'd pick a random number on the
76 | fly -- using the same method you used to populate the RA -- and test
77 | your number against the stored attribute.
78 |
79 |
80 | <% code 'javascript' do %>
81 | > rand = Math.random()
82 | > cmp = Math.random()
83 | > result = db.docs.findOne( { key : 2, random : { $gte : rand } } )
84 | > if ( result == null ) {
85 | > result = db.docs.findOne( { key : 2, random : { $lte : rand } } )
86 | > }
87 | <% end %>
88 |
89 |
90 | Note that we're not going for equality alone because the chances of
91 | that to occur are low. So we try either '$gte' or '$lte' with equal
92 | probability but knowing that in some cases it may not return a result,
93 | even though there are documents in the result. For that reason, an
94 | empty result must be verified by doing a search in the opposite
95 | direction.
96 |
97 | The final -- but important -- detail about this query is that both the
98 | search criteria and the RA must be indexed together. Please, see the
99 | Caveats sections for further details.
100 |
101 | <% code 'javascript' do %>
102 | > db.docs.ensureIndex( { key : 1, random :1 } )
103 | <% end %>
104 |
105 |
106 |
107 | ### 2. Map/reduce on a sample of the data
108 |
109 | If your collection is large and the computation you want to run could
110 | operate on a sample instead, ie tolerating a less accurate result, you
111 | can have the mapping phase apply an early filter based on the RA.
112 |
113 | <% code 'javascript' do %>
114 | > db.docs.drop()
115 | > for (i=0; i < 10000; i++) { db.docs.save( { key : i % 10, rand : Math.random() } ) }
116 | > m = function() { emit(this.key, 1); }
117 | > r = function(k, vals) {
118 | var sum=0;
119 | for (var i in vals) sum += vals[i];
120 | return sum;
121 | }
122 | > sample = 0.1
123 | > res = db.docs.mapReduce(m, r, { query : { key : 2, rand : { $lte: sample } } })
124 | <% end %>
125 |
126 | Mongo will issue the query over all the data but the mapper would be
127 | called only for the sampled documents, 10% of them here. In the
128 | example above, the running time should be significantly reduced as
129 | compared with the "full" query. The counter for 'key : 2' with a 10%
130 | sample was 85 when the perfect result would have been 100.
131 |
132 | You could improve accuracy by increasing the sample size, of course.
133 |
134 | Here's a sample-based count for all the keys. This should give you an
135 | idea about the speed/accuracy trade-off.
136 |
137 | <% code 'javascript' do %>
138 | > res = db.docs.mapReduce(m, r, { query : { rand : { $lte : 0.1 } } })
139 | ...
140 | > db[res.result].find()
141 | { "_id" : 0, "value" : 93 }
142 | { "_id" : 1, "value" : 82 }
143 | { "_id" : 2, "value" : 85 }
144 | { "_id" : 3, "value" : 92 }
145 | { "_id" : 4, "value" : 114 }
146 | { "_id" : 5, "value" : 104 }
147 | { "_id" : 6, "value" : 100 }
148 | { "_id" : 7, "value" : 90 }
149 | { "_id" : 8, "value" : 104 }
150 | { "_id" : 9, "value" : 103 }
151 | <% end %>
152 |
153 | In the map/reduce RA case, that attribute doesn't necessarily need to
154 | be indexed as in case 1 above.
155 |
156 |
157 | ### Caveat
158 |
159 | In the simple document case, the query we'd use must be an equality
160 | one. The map-reduce case doesn't require so.
161 |
162 |
163 | The random attribute will work better if the results we're extracting
164 | from have a large number of documents. Consider for instance a query with few results:
165 |
166 | <% code 'javascript' do %>
167 | > db.docs.save( { key : 1, random : Math.random() } )
168 | > db.docs.save( { key : 1, random : Math.random() } )
169 | > db.docs.find()
170 | { "_id" : ObjectId("4bfa9585cffdb770c08e7cc9"), "key" : 1, "random" : 0.9988383572723725 }
171 | { "_id" : ObjectId("4bfa9586cffdb770c08e7cca"), "key" : 1, "random" : 0.8338006548262672 }
172 | <% end %>
173 |
174 |
175 | The RA cannot be considered to be uniformly distributed between 0 and
176 | 1 for that key. The net effect is that some documents from the result
177 | would appear much often than others, when a random document form
178 | search criteria 'k : 1' is requested.
179 |
--------------------------------------------------------------------------------
/content/patterns/finding_max_and_min.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Finding Max And Min Values with Versioned Documents
3 | created_at: 2010-05-24 20:16:24.036546 -05:00
4 | recipe: true
5 | author: Amos King
6 | description: How to use MapReduce to find max/min values for a given set and property
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | ### Problem
13 |
14 | You want to list the latest version numbers of a set of documents. Each document contains
15 | a field that represents the version of the document and a field representing which document
16 | that is a version of:
17 |
18 | <% code 'javascript' do %>
19 | {
20 | "document_id" : "mongoDB How-To",
21 | "author" : "Amos King",
22 | "content" : "...",
23 | "version" : 1.0
24 | }
25 | <% end %>
26 |
27 | We want to end up with a collection of document_ids and their largest version number:
28 |
29 | <% code 'javascript' do %>
30 | {"_id" : "mongoDB How To", "value" : 1.1}
31 | {"_id" : "Resume", "value" : 6}
32 | {"_id" : "Schema", "value" : 1}
33 | <% end %>
34 |
35 | ### Solution
36 |
37 | Use the `mapreduce` database command. Emit each document_id and version in the map function,
38 | then use the reduce function to find the max version.
39 |
40 | #### 1. Map
41 |
42 | The map function is very simple. We use our common element between all versions as the key and the version as the value:
43 |
44 | <% code 'javascript' do %>
45 | map = function () {
46 | emit(this.document_id, this.version);
47 | }
48 | <% end %>
49 |
50 | #### 2. Reduce
51 |
52 | The reduce function is also very simple but has a little bit of javascript magic.
53 | Math.max normally takes in any number of arguments(ie. Math.max(1,2,3) ), but we
54 | need to call it with an array. So we call Math.max with apply so that we can pass
55 | in an array of values to max. The apply breaks the array into individual arguments
56 | to pass to Math.max. The first argument to apply is the context in which we want to
57 | run; Math will do fine here.
58 |
59 | <% code 'javascript' do %>
60 | reduce = function (key, values) {
61 | return Math.max.apply(Math, values);
62 | }
63 | <% end %>
64 |
65 | Finding the minimum value is as easy as replacing Math.max with Math.min.
66 |
67 | #### 3. Call the `mapreduce` command
68 |
69 | Now it's time to get our result set. We'll set the output collection name parameter to 'newest_versions' so that we'll have an appropriately named set to work with:
70 |
71 | <% code 'javascript' do %>
72 | > result = db.runCommand({
73 | ... "mapreduce" : "documents",
74 | ... "map" : map,
75 | ... "reduce" : reduce,
76 | ... "out" : "newest_versions"})
77 | <% end %>
78 |
79 | Now, we query the 'newest_versions' collection. Each document is exactly what we're looking for:
80 |
81 | <% code 'javascript' do %>
82 | > db.newest_versions.find()
83 | {"_id" : "mongoDB How To", "value" : 1.1}
84 | {"_id" : "Resume", "value" : 6}
85 | {"_id" : "Schema", "value" : 1}
86 | <% end %>
87 |
88 | ### Extras
89 |
90 | The Map and Reduce Functions can be rewritten slightly to return the Maximum and Minimum versions of each document.
91 |
92 | For the purpose of this example, the input collection is as follows: (The _id values have been truncated for brevity.)
93 |
94 | <% code 'javascript' do %>
95 | > db.documents.find()
96 | { "_id" : 1, "document_id" : "mongoDB How-To", "author" : "Amos King", "content" : "...", "version" : 1 }
97 | { "_id" : 2, "document_id" : "mongoDB How-To", "author" : "Amos King", "content" : "...", "version" : 1.1 }
98 | { "_id" : 3, "document_id" : "Resume", "author" : "Author", "content" : "...", "version" : 6 }
99 | { "_id" : 4, "document_id" : "Schema", "author" : "Someone Else", "content" : "...", "version" : 0.9 }
100 | { "_id" : 5, "document_id" : "Schema", "author" : "Someone Else", "content" : "...", "version" : 1 }
101 | >
102 | <% end %>
103 |
104 | #### Map
105 |
106 | The new Map function emits documents containing the document_id, and "value" key containing a list of embedded documents, each containing the keys, "max" and "min". Both keys are initially set to be equal to the "version" key of the current document. Because there is only one document containing the "document_id" : "Resume", this output will not need to be reduced.
107 |
108 | <% code 'javascript' do %>
109 | map = function () {
110 | emit(this.document_id, {max:this.version, min:this.version});
111 | }
112 | <% end %>
113 |
114 | The Map function will emit something that looks like the following:
115 |
116 | <% code 'javascript' do %>
117 | "mongoDB How-To", { "max" : 1, "min" : 1 }
118 | "mongoDB How-To", { "max" : 1.1, "min" : 1.1 }
119 | "Resume", { "max" : 6, "min" : 6 }
120 | "Schema", { "max" : 0.9, "min" : 0.9 }
121 | "Schema", { "max" : 1, "min" : 1 }
122 | <% end %>
123 |
124 | #### Reduce
125 |
126 | Next the Reduce function will be run to compress the data emit by the Map function. The Reduce function requires an input of an id, and a list of values. It must output an id and a single value, which in this case is a document containing the keys, "max" and "min". The reduce function will interpret the data that has been emitted from the Map function as follows:
127 |
128 | <% code 'javascript' do %>
129 | "mongoDB How-To", [{ "max" : 1, "min" : 1 }, { "max" : 1.1, "min" : 1.1 }]
130 | "Schema", [{ "max" : 0.9, "min" : 0.9 }, { "max" : 1, "min" : 1 }]
131 | <% end %>
132 |
133 | The Reduce function will be run repeatedly, passing its previous output value as the new input, until the output list contains only one value.
134 |
135 | Notice that the id "Resume" is not passed to the Reduce function, because it only has one value associated with it. This reduce function will find the maximum "max" value, and the minimum "min" value for each key. It will be run twice; once for the id "Schema", and once for the id "mongoDB How-To".
136 |
137 | <% code 'javascript' do %>
138 | reduce = function (key, values) {
139 | max = values[0].max;
140 | min = values[0].min;
141 | if (values.length > 1){
142 | for(i in values){
143 | if(values[i].max > max){
144 | max = values[i].max;
145 | };
146 | if(values[i].min < min){
147 | min = values[i].min;
148 | };
149 | };
150 | };
151 | return {"max":max, "min":min};
152 | }
153 | }
154 | <% end %>
155 |
156 | Running mapreduce will return the following:
157 |
158 | <% code 'javascript' do %>
159 | > result = db.runCommand({"mapreduce" : "documents","map" : map,"reduce" : reduce,"out" : "newest_versions"})
160 | {
161 | "result" : "newest_versions",
162 | "timeMillis" : 2,
163 | "counts" : {
164 | "input" : 5,
165 | "emit" : 5,
166 | "reduce" : 2,
167 | "output" : 3
168 | },
169 | "ok" : 1
170 | }
171 | > db.newest_versions.find()
172 | { "_id" : "Resume", "value" : { "max" : 6, "min" : 6 } }
173 | { "_id" : "Schema", "value" : { "max" : 1, "min" : 0.9 } }
174 | { "_id" : "mongoDB How-To", "value" : { "max" : 1.1, "min" : 1 } }
175 | <% end %>
176 |
177 | ### See Also
178 |
179 | * The MongoDB [docs on mapreduce][1]
180 |
181 | [1]: http://www.mongodb.org/display/DOCS/MapReduce
182 |
--------------------------------------------------------------------------------
/content/patterns/perform-two-phase-commits.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Perform Two Phase Commits
3 | created_at: 2011-12-02
4 | recipe: true
5 | author: Antoine Girbal
6 | description: Perform Two-Phase Commits with MongoDB
7 | filter:
8 | - erb
9 | - markdown
10 | ---
11 |
12 | A common problem with non-relational database is that it is not
13 | possible to do transactions across several documents. When executing a
14 | transaction composed of several sequential operations, some issues
15 | arise:
16 |
17 | - Atomicity: it is difficult to rollback changes by previous
18 | operations if one fails.
19 |
20 | - Isolation: changes to a single document are seen by concurrent
21 | processes, which may have an inconsistent view of the data during
22 | the transaction execution.
23 |
24 | - Consistency: In case of a major failure (network, hardware) it is
25 | possible that the data will be left inconsistent and difficult to
26 | repair.
27 |
28 | MongoDB provides atomicity for an operation on a single
29 | document. Since documents can be fairly complex, this actually covers
30 | many more cases than with a traditional DB. Still there are cases
31 | where transactions across documents are needed, and that is when a
32 | two-phase commit can be used. The two-phase commit is made possible by
33 | the fact that documents are complex and can represent pending data and
34 | states. This process makes sure that the data is eventually
35 | consistent, which is usually what matters most to the system.
36 |
37 | ## Account transfer example
38 |
39 | ## Problem overview
40 |
41 | The most common example of transaction is to transfer funds from
42 | account A to B in a reliable way. With a traditional RDBMS, funds are
43 | substracted from A and added to B within an atomic transaction. With
44 | MongoDB, a viable solution is to use a two-phase commit.
45 |
46 | Let's have one collection holding accounts:
47 |
48 | <% code 'javascript' do %>
49 | foo:PRIMARY> db.accounts.save({name: "A", balance: 1000, pendingTransactions: []})
50 | foo:PRIMARY> db.accounts.save({name: "B", balance: 1000, pendingTransactions: []})
51 | foo:PRIMARY> db.accounts.find()
52 | { "_id" : ObjectId("4d7bc66cb8a04f512696151f"), "name" : "A", "balance" : 1000, "pendingTransactions" : [ ] }
53 | { "_id" : ObjectId("4d7bc67bb8a04f5126961520"), "name" : "B", "balance" : 1000, "pendingTransactions" : [ ] }
54 | {code}And we need one collection representing transactions:
55 | {code}foo:PRIMARY> db.transactions.save({source: "A", destination: "B", value: 100, state: "initial"})
56 | foo:PRIMARY> db.transactions.find()
57 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "source" : "A", "destination" : "B", "value" : 100, "state" : "initial" }
58 | <% end %>
59 |
60 |
61 | ### Transaction description
62 |
63 | **Step 1:** the transaction state is switched to, "pending":
64 |
65 | <% code 'javascript' do %>
66 | foo:PRIMARY> t = db.transactions.findOne({state: "initial"})
67 | {
68 | "_id" : ObjectId("4d7bc7a8b8a04f5126961522"),
69 | "source" : "A",
70 | "destination" : "B",
71 | "value" : 100,
72 | "state" : "initial"
73 | }
74 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "pending"}})
75 | foo:PRIMARY> db.transactions.find()
76 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "source" : "A", "destination" : "B", "value" : 100, "state" : "pending" }
77 | <% end %>
78 |
79 | **Step 2:** apply the transaction to both accounts, and make sure the
80 | transaction is not already pending:
81 |
82 | <% code 'javascript' do %>
83 | foo:PRIMARY> db.accounts.update({name: t.source, pendingTransactions: {$ne: t._id}}, {$inc: {balance: -t.value}, $push: {pendingTransactions: t._id}})
84 | foo:PRIMARY> db.accounts.update({name: t.destination, pendingTransactions: {$ne: t._id}}, {$inc: {balance: t.value}, $push: {pendingTransactions: t._id}})
85 | foo:PRIMARY> db.accounts.find()
86 | { "_id" : ObjectId("4d7bc97fb8a04f5126961523"), "balance" : 900, "name" : "A", "pendingTransactions" : [ ObjectId("4d7bc7a8b8a04f5126961522") ] }
87 | { "_id" : ObjectId("4d7bc984b8a04f5126961524"), "balance" : 1100, "name" : "B", "pendingTransactions" : [ ObjectId("4d7bc7a8b8a04f5126961522") ] }
88 | <% end %>
89 |
90 | **Step 3:** set the transaction's state to "committed":
91 |
92 | <% code 'javascript' do %>
93 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "committed"}})
94 | foo:PRIMARY> db.transactions.find()
95 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "destination" : "B", "source" : "A", "state" : "committed", "value" : 100 }
96 | <% end %>
97 |
98 | **Step 4:** remove the pending transaction from accounts:
99 |
100 | <% code 'javascript' do %>
101 | foo:PRIMARY> db.accounts.update({name: t.source}, {$pull: {pendingTransactions: ObjectId("4d7bc7a8b8a04f5126961522")}})
102 | foo:PRIMARY> db.accounts.update({name: t.destination}, {$pull: {pendingTransactions: ObjectId("4d7bc7a8b8a04f5126961522")}})
103 | foo:PRIMARY> db.accounts.find()
104 | { "_id" : ObjectId("4d7bc97fb8a04f5126961523"), "balance" : 900, "name" : "A", "pendingTransactions" : [ ] }
105 | { "_id" : ObjectId("4d7bc984b8a04f5126961524"), "balance" : 1100, "name" : "B", "pendingTransactions" : [ ] }
106 | <% end %>
107 |
108 | **Step 5:** set transaction's state to "done":
109 |
110 | <% code 'javascript' do %>
111 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "done"}})
112 | foo:PRIMARY> db.transactions.find()
113 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "destination" : "B", "source" : "A", "state" : "done", "value" : 100 }
114 | <% end %>
115 |
116 | ### Failure scenarios
117 |
118 | Now let's look at the failure scenarios and how to deal with them. For
119 | example, a failure can be that the application making the sequential
120 | operations suddenly dies, and is restarted.
121 |
122 | Cases to cover:
123 |
124 | - any failure between after step 1 and before step 3: Application
125 | should get a list of transactions in state "pending" and resume from
126 | step 2.
127 |
128 | - any failure after step 3 and before step 5: Application should get a
129 | list of transactions in state "applied" and resume from step 4.
130 |
131 | Application is thus always able to resume the transaction and
132 | eventually get to a consistent state. These "repair" jobs should be
133 | run at application startup and possibly at regular interval to catch
134 | any unfinished transaction. The time it takes to get to a consistent
135 | state may vary depending on how long it takes to resume a failed
136 | transaction.
137 |
138 | ### Rollback
139 |
140 | A common need may be to rollback a transaction, either because it has
141 | been cancelled or because it can never succeed (e.g. account B is
142 | closed).
143 |
144 | Two cases:
145 |
146 | - after step 3, the transaction is considered committed and should not
147 | be rolled back. Instead, to undo the transaction, a new transaction
148 | can be created with an opposite source and destination.
149 |
150 | - after step 1 and before step 3: the process below should be applied.
151 |
152 | **Step 1:** set the transaction's state to "canceling":
153 |
154 | <% code 'javascript' do %>
155 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "canceling"}})
156 | <% end %>
157 |
158 | **Step 2:** undo the transaction from accounts:
159 |
160 | <% code 'javascript' do %>
161 | foo:PRIMARY> db.accounts.update({name: t.source, pendingTransactions: t._id}, {$inc: {balance: t.value}, $pull: {pendingTransactions: t._id}})
162 | foo:PRIMARY> db.accounts.update({name: t.destination, pendingTransactions: t._id}, {$inc: {balance: -t.value}, $pull: {pendingTransactions: t._id}})
163 | foo:PRIMARY> db.accounts.find()
164 | { "_id" : ObjectId("4d7bc97fb8a04f5126961523"), "balance" : 1000, "name" : "A", "pendingTransactions" : [ ] }
165 | { "_id" : ObjectId("4d7bc984b8a04f5126961524"), "balance" : 1000, "name" : "B", "pendingTransactions" : [ ] }
166 | <% end %>
167 |
168 | **Step 3:** set the transaction's state to "cancelled":
169 |
170 | <% code 'javascript' do %>
171 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "cancelled"}})
172 | <% end %>
173 |
174 | ### Multiple applications
175 |
176 | A common issue that exists with any DBs is how to make it safe for
177 | several applications to run transactions. It is important that only 1
178 | application handles a given transaction at one point in time, because
179 | otherwise conflicts can happen.
180 |
181 | One example is:
182 |
183 | - application A1 and A2 both grab transaction T1 which is in "initial"
184 | state.
185 |
186 | - A1 applies the whole transaction before A2 starts
187 |
188 | - A2 applies transaction a 2nd time because it does not appear as
189 | pending in the accounts
190 |
191 | To handle multiple applications, there should be a marker at the
192 | transaction level that the transaction is being handled. One can use
193 | findAndModify:
194 |
195 | <% code 'javascript' do %>
196 | foo:PRIMARY> t = db.transactions.findAndModify({query: {state: "initial", application: {$exists: 0}}, update: {$set: {state: "pending", application: "A1"}}, new: true})
197 | {
198 | "_id" : ObjectId("4d7be8af2c10315c0847fc85"),
199 | "application" : "A1",
200 | "destination" : "B",
201 | "source" : "A",
202 | "state" : "pending",
203 | "value" : 150
204 | }
205 | <% end %>
206 |
207 | The only remaining issue is if application A1 dies during transaction
208 | execution.The resume processes described in "Failure scenarios" can be
209 | applied, but application should make sure it owns the
210 | transactions. For example to resume pending jobs, query should be:
211 |
212 | <% code 'javascript' do %>
213 | foo:PRIMARY> db.transactions.find({application: "A1", state: "pending"})
214 | { "_id" : ObjectId("4d7be8af2c10315c0847fc85"), "application" : "A1", "destination" : "B", "source" : "A", "state" : "pending", "value" : 150 }
215 | <% end %>
216 |
217 | ### Proper two-phase commit
218 |
219 | This implementation tries to be simple on purpose, it assumes that:
220 |
221 | - an account operation can always be rolled back
222 |
223 | - the account balance can go negative
224 |
225 | A proper real world implementation would probably differ:
226 |
227 | - accounts have both a current balance, pending credits, pending
228 | debits.
229 |
230 | - during step 2, application makes sure accounts has sufficient funds
231 | for transaction, modifies credits/debits and adds transaction as
232 | pending, all in one update.
233 |
234 | - during step 4, application actually applies the transaction on
235 | balance, modifies credits/debits and removes transaction from
236 | pending, all in one update.
237 |
238 | ### Additional notes:
239 |
240 | In the context of important transactions, you will probably want to use:
241 |
242 | - reasonable "getLastError" to check that operations are actually
243 | written to the DB (see "getLastError" or "write concern" for your
244 | drivers).
245 |
246 | - durability so that operations are consistently saved on disk when an
247 | operation returns successfully.
248 |
--------------------------------------------------------------------------------
/content/operations/convert-replica-set-to-replicated-shard-cluster.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Converting a Replica Set to a Replicated Shard Cluster
3 | created_at: 2011-11-30
4 | recipe: true
5 | author: Marc Bastien
6 | filter:
7 | - erb
8 | - markdown
9 | ---
10 |
11 | ## Overview
12 |
13 | This tutorial documents the process for converting a single 3-member
14 | replica set to a shard cluster that consists of 2 shards. Each shard
15 | will consist of an independent 3-member replica set.
16 |
17 | The procedure that follows uses a test environment running on a local
18 | system (i.e. localhost,) and has been tested. You should feel
19 | encouraged to "follow along at home." In a production environment or
20 | one with multiple systems, use the same process except where noted.
21 |
22 | In brief, the process is as follows:
23 |
24 | 1. Create or select an existing 3-member replica set, and insert
25 | some data into a collection.
26 |
27 | 2. Start the config servers and create a shard cluster with a single
28 | shard.
29 |
30 | 3. Create a second replica set with three new `mongod` processes.
31 |
32 | 4. Add the second replica set to the sharded cluster.
33 |
34 | 5. Enable sharding on the desired collection or collections.
35 |
36 | ## Process
37 |
38 | ### 1. Set up a Three Member Replica Set and Insert Test Data
39 |
40 | #### 1.1. Create Directories for First Replica Set Instance
41 |
42 | Create the following data directories for the members of the
43 | first replica set, named firstset:
44 |
45 | - `/data/example/firstset1`
46 | - `/data/example/firstset2`
47 | - `/data/example/firstset3`
48 |
49 | #### 1.2. Start Three `mongod` instances
50 |
51 | Run each command in a separate terminal window or GNU Screen window.
52 |
53 | <% code 'javascript' do %>
54 | $ bin/mongod --dbpath /data/example/firstset1 --port 10001 --replSet firstset --oplogSize 700 --rest
55 | <% end %>
56 |
57 | <% code 'javascript' do %>
58 | $ bin/mongod --dbpath /data/example/firstset2 --port 10002 --replSet firstset --oplogSize 700 --rest
59 | <% end %>
60 |
61 | <% code 'javascript' do %>
62 | $ bin/mongod --dbpath /data/example/firstset3 --port 10003 --replSet firstset --oplogSize 700 --rest
63 | <% end %>
64 |
65 | **Note:** Here, the "`--oplogSize 700`" option restricts the size of
66 | the operation log (i.e. oplog) for each `mongod` process to
67 | 700MB. Without the `--oplogSize` option, each `mongod` will reserve
68 | approximately 5% of the free disk space on the volume. By limiting the
69 | size of the oplog, each process will start more quickly. Omit this setting
70 | in production environments.
71 |
72 | #### 1.3 Connect to One MongoDB Instance with `mongo` shell
73 |
74 | Run the following command in a new terminal to connect to a node.
75 |
76 | <% code 'javascript' do %>
77 | $ bin/mongo localhost:10001/admin
78 | MongoDB shell version: 2.0.2-rc1
79 | connecting to: localhost:10001/admin
80 | >
81 | <% end %>
82 |
83 | **Note:** Above and hereafter, if you are running in a production
84 | environment or are testing this process with `mongod` instances on
85 | multiple systems replace "localhost" with a resolvable domain,
86 | hostname, or the IP address of your system.
87 |
88 | #### 1.4. Initialize the First Replica Set
89 |
90 | <% code 'javascript' do %>
91 | > db.runCommand({"replSetInitiate" : {"_id" : "firstset", "members" : [{"_id" : 1, "host" : "localhost:10001"}, {"_id" : 2, "host" : "localhost:10002"}, {"_id" : 3, "host" : "localhost:10003"}]}})
92 | {
93 | "info" : "Config now saved locally. Should come online in about a minute.",
94 | "ok" : 1
95 | }
96 | <% end %>
97 |
98 | #### 1.5. Create and Populate a New Collection
99 |
100 | The following JavScript writes one million documents to the
101 | collection "`test_collection`" in the following form:
102 |
103 | <% code 'javascript' do %>
104 | { "_id" : ObjectId("4ed5420b8fc1dd1df5886f70"), "name" : "Greg", "user_id" : 4, "boolean" : true, "added_at" : ISODate("2011-11-29T20:35:23.121Z"), "number" : 74 }
105 | <% end %>
106 |
107 | Use the following sequence of operations from the `mongo` prompt.
108 |
109 | <% code 'javascript' do %>
110 | PRIMARY> use test
111 | switched to db test
112 | PRIMARY> people = ["Marc", "Bill", "George", "Eliot", "Matt", "Trey", "Tracy", "Greg", "Steve", "Kristina", "Katie", "Jeff"];
113 | PRIMARY> for(var i=0; i<1000000; i++){
114 | name = people[Math.floor(Math.random()*people.length)];
115 | user_id = i;
116 | boolean = [true, false][Math.floor(Math.random()*2)];
117 | added_at = new Date();
118 | number = Math.floor(Math.random()*10001);
119 | db.test_collection.save({"name":name, "user_id":user_id, "boolean": boolean, "added_at":added_at, "number":number });
120 | }
121 | <% end %>
122 |
123 | Creating and fully replicating one million documents in the `mongo`
124 | shell may take several minutes depending on your system.
125 |
126 | ### 2. Start the "config" Instances and Create a Cluster a Single Shard
127 |
128 | **Note:** For development and testing environments, a single config
129 | server is sufficient, in production environments, use three config
130 | servers. Because config instances only store the *metadata* for the
131 | shard cluster, they have minimal resource requirements.
132 |
133 | These instructions specify creating three config servers.
134 |
135 | #### 2.1. Create Directories for Config Instances
136 |
137 | Create the following data directories for each of the config
138 | instances:
139 |
140 | - `/data/example/config1`
141 | - `/data/example/config2`
142 | - `/data/example/config3`
143 |
144 | #### 2.2. Start the config Servers
145 |
146 | Run each command in a separate terminal window or GNU Screen window.
147 |
148 | <% code 'javascript' do %>
149 | $ bin/mongod --configsvr --dbpath /data/example/config1 --port 20001
150 | <% end %>
151 |
152 | <% code 'javascript' do %>
153 | $ bin/mongod --configsvr --dbpath /data/example/config2 --port 20002
154 | <% end %>
155 |
156 | <% code 'javascript' do %>
157 | $ bin/mongod --configsvr --dbpath /data/example/config3 --port 20003
158 | <% end %>
159 |
160 | #### 2.3. Start `mongos`
161 |
162 | Run the following command to start a `mongos` instance. Run this
163 | command in a new terminal window or GNU Screen window.
164 |
165 | <% code 'javascript' do %>
166 | $ bin/mongos --configdb localhost:20001,localhost:20002,localhost:20003 --port 27017 --chunkSize 1
167 | <% end %>
168 |
169 | **Note:** If you are using the collection created earlier, or are
170 | just experimenting with sharding, you can use a small --chunkSize (1MB
171 | works well.) The default chunkSize of 64MB, means that your cluster
172 | will need to have 64MB of data before the MongoDB's automatic sharding
173 | begins working. In production environments, do not use a small shard
174 | size.
175 |
176 | The `configdb` options specify the *configuration servers*
177 | (e.g. `localhost:20001`, `localhost:20002`, and `localhost:2003`). The
178 | `mongos` process runs on the default "MongoDB" port (i.e. `27017`),
179 | while the databases themselves, in this example, are running on ports in the
180 | `30001` series. In the above example, since `27017` is the default
181 | port, the option "`--port 27017`" may be omitted. It is included here
182 | only as an example.
183 |
184 | #### 2.4. Add the first shard in `mongos`
185 |
186 | In in a new terminal window or GNU Screen session, add the first
187 | shard, using the following procedure:
188 |
189 | <% code 'javascript' do %>
190 | $ bin/mongo localhost:27017/admin
191 | MongoDB shell version: 2.0.2-rc1
192 | connecting to: localhost:27017/admin
193 | mongos> db.runCommand( { addshard : "firstset/localhost:10001,localhost:10002,localhost:10003" } )
194 | { "shardAdded" : "firstset", "ok" : 1 }
195 | mongos>
196 | <% end %>
197 |
198 | ### 3. Create a second replica set with three new mongod processes
199 |
200 | #### 3.1. Create Directories for Second Replica Set Instance
201 |
202 | Create the following data directories for the members of the
203 | second replica set, named secondset:
204 |
205 | - `/data/example/secondset1`
206 | - `/data/example/secondset2`
207 | - `/data/example/secondset3`
208 |
209 | #### 3.2. Start three instances of mongod in three new terminal windows
210 |
211 | <% code 'javascript' do %>
212 | $ bin/mongod --dbpath /data/example/secondset1 --port 10004 --replSet secondset --oplogSize 700 --rest
213 | <% end %>
214 |
215 | <% code 'javascript' do %>
216 | $ bin/mongod --dbpath /data/example/secondset2 --port 10005 --replSet secondset --oplogSize 700 --rest
217 | <% end %>
218 |
219 | <% code 'javascript' do %>
220 | $ bin/mongod --dbpath /data/example/secondset3 --port 10006 --replSet secondset --oplogSize 700 --rest
221 | <% end %>
222 |
223 | NOTE: As in 1.2, this set uses the smaller `oplogSize`
224 | configuration. Omit this setting in production environments.
225 |
226 | #### 3.3. Connect to One MongoDB Instance with `mongo` shell
227 |
228 | <% code 'javascript' do %>
229 | $ bin/mongo localhost:10004/admin
230 | MongoDB shell version: 2.0.2-rc1
231 | connecting to: localhost:10004/admin
232 | >
233 | <% end %>
234 |
235 | #### 3.4. Initialize the Second Replica Set
236 |
237 | <% code 'javascript' do %>
238 | > db.runCommand({"replSetInitiate" : {"_id" : "secondset", "members" : [{"_id" : 1, "host" : "localhost:10004"}, {"_id" : 2, "host" : "localhost:10005"}, {"_id" : 3, "host" : "localhost:10006"}]}})
239 | {
240 | "info" : "Config now saved locally. Should come online in about a minute.",
241 | "ok" : 1
242 | }
243 | <% end %>
244 |
245 | ### 4. Add the Second Replica Set to the Shard Cluster
246 |
247 | In a connection to the `mongos` instance (created above), follow the
248 | below procedure.
249 |
250 | <% code 'javascript' do %>
251 | mongos> use admin
252 | switched to db admin
253 | mongos> db.runCommand( { addshard : "secondset/localhost:10004,localhost:10005,localhost:10006" } )
254 | { "shardAdded" : "secondset", "ok" : 1 }
255 | <% end %>
256 |
257 | You can verify that both shards are properly configured by running the
258 | `listshards` command. View this and example output below:
259 |
260 | <% code 'javascript' do %>
261 | mongos> db.runCommand({listshards:1})
262 | {
263 | "shards" : [
264 | {
265 | "_id" : "firstset",
266 | "host" : "firstset/localhost:10001,localhost:10003,localhost:10002"
267 | },
268 | {
269 | "_id" : "secondset",
270 | "host" : "secondset/localhost:10004,localhost:10006,localhost:10005"
271 | }
272 | ],
273 | "ok" : 1
274 | }
275 | <% end %>
276 |
277 | ### 5. Enable Sharding
278 |
279 | Sharding in MongoDB must be enabled on *both* the database and
280 | collection levels.
281 |
282 | #### 5.1. Enabling Sharding on the Database Level
283 |
284 | Issue the `enablesharding` command. The "`test`" argument specifies
285 | the name of the database. See the following example:
286 |
287 | <% code 'javascript' do %>
288 | mongos> db.runCommand( { enablesharding : "test" } )
289 | { "ok" : 1 }
290 | <% end %>
291 |
292 | #### 5.2. Create an Index on the Shard Key
293 |
294 | Create an index on the shard key. The shard key is used by MongoDB to
295 | distribute documents between shards. Once selected the shard key
296 | cannot be changed. Good shard keys:
297 |
298 | - will have values that are evenly distributed among all documents,
299 | - group documents that are likely to be accessed at the same time in
300 | contiguous chunks, and
301 | - allow for effective distribution of activity among shards.
302 |
303 | Typically shard keys are compound, comprising of some sort of hash and
304 | some sort of other primary key. Selecting a shard key, depends on your
305 | data set, application architecture, and usage pattern, and is beyond
306 | the scope of this document. For the purposes of this example, we will
307 | shard the "number" key in the data inserted above. This would
308 | typically not a good shard key for production deployments.
309 |
310 | Create the index with the following procedure:
311 |
312 | <% code 'javascript' do %>
313 | mongos> use test
314 | switched to db test
315 | mongos> db.test_collection.ensureIndex({number:1})
316 | <% end %>
317 |
318 | #### 5.3. Shard the Collection
319 |
320 | Issue the following command to shard the collection:
321 |
322 | <% code 'javascript' do %>
323 | mongos> use admin
324 | switched to db admin
325 | mongos> db.runCommand( { shardcollection : "test.test_collection", key : {"number":1} })
326 | { "collectionsharded" : "test.test_collection", "ok" : 1 }
327 | mongos>
328 | <% end %>
329 |
330 | The collection "`test_collection`" is now sharded!
331 |
332 | Over the next few minutes the Balancer will begin to redistribute
333 | chunks of documents. You can confirm this activity by switching to the
334 | `test` database and running `db.stats()` or `db.printShardingStatus()`.
335 |
336 | Additional documents that are added to this collection will be distributed evenly between the shards.
337 |
338 | See the following examples:
339 |
340 | <% code 'javascript' do %>
341 | mongos> use test
342 | switched to db test
343 | mongos> db.stats()
344 | {
345 | "raw" : {
346 | "firstset/localhost:10001,localhost:10003,localhost:10002" : {
347 | "db" : "test",
348 | "collections" : 3,
349 | "objects" : 973887,
350 | "avgObjSize" : 100.33173458522396,
351 | "dataSize" : 97711772,
352 | "storageSize" : 141258752,
353 | "numExtents" : 15,
354 | "indexes" : 2,
355 | "indexSize" : 56978544,
356 | "fileSize" : 1006632960,
357 | "nsSizeMB" : 16,
358 | "ok" : 1
359 | },
360 | "secondset/localhost:10004,localhost:10006,localhost:10005" : {
361 | "db" : "test",
362 | "collections" : 3,
363 | "objects" : 26125,
364 | "avgObjSize" : 100.33286124401914,
365 | "dataSize" : 2621196,
366 | "storageSize" : 11194368,
367 | "numExtents" : 8,
368 | "indexes" : 2,
369 | "indexSize" : 2093056,
370 | "fileSize" : 201326592,
371 | "nsSizeMB" : 16,
372 | "ok" : 1
373 | }
374 | },
375 | "objects" : 1000012,
376 | "avgObjSize" : 100.33176401883178,
377 | "dataSize" : 100332968,
378 | "storageSize" : 152453120,
379 | "numExtents" : 23,
380 | "indexes" : 4,
381 | "indexSize" : 59071600,
382 | "fileSize" : 1207959552,
383 | "ok" : 1
384 | }
385 | mongos> db.printShardingStatus()
386 | --- Sharding Status ---
387 | sharding version: { "_id" : 1, "version" : 3 }
388 | shards:
389 | { "_id" : "firstset", "host" : "firstset/localhost:10001,localhost:10003,localhost:10002" }
390 | { "_id" : "secondset", "host" : "secondset/localhost:10004,localhost:10006,localhost:10005" }
391 | databases:
392 | { "_id" : "admin", "partitioned" : false, "primary" : "config" }
393 | { "_id" : "test", "partitioned" : true, "primary" : "firstset" }
394 | test.test_collection chunks:
395 | secondset 5
396 | firstset 186
397 | too many chunks to print, use verbose if you want to force print
398 |
399 | mongos> db.stats()
400 | {
401 | "raw" : {
402 | "firstset/localhost:10001,localhost:10003,localhost:10002" : {
403 | "db" : "test",
404 | "collections" : 3,
405 | "objects" : 910960,
406 | "avgObjSize" : 100.33197066830596,
407 | "dataSize" : 91398412,
408 | "storageSize" : 141258752,
409 | "numExtents" : 15,
410 | "indexes" : 2,
411 | "indexSize" : 55400576,
412 | "fileSize" : 1006632960,
413 | "nsSizeMB" : 16,
414 | "ok" : 1
415 | },
416 | "secondset/localhost:10004,localhost:10006,localhost:10005" : {
417 | "db" : "test",
418 | "collections" : 3,
419 | "objects" : 89052,
420 | "avgObjSize" : 100.32942550419979,
421 | "dataSize" : 8934536,
422 | "storageSize" : 11194368,
423 | "numExtents" : 8,
424 | "indexes" : 2,
425 | "indexSize" : 7178528,
426 | "fileSize" : 201326592,
427 | "nsSizeMB" : 16,
428 | "ok" : 1
429 | }
430 | },
431 | "objects" : 1000012,
432 | "avgObjSize" : 100.33174401907178,
433 | "dataSize" : 100332948,
434 | "storageSize" : 152453120,
435 | "numExtents" : 23,
436 | "indexes" : 4,
437 | "indexSize" : 62579104,
438 | "fileSize" : 1207959552,
439 | "ok" : 1
440 | }
441 | mongos> db.printShardingStatus()
442 | --- Sharding Status ---
443 | sharding version: { "_id" : 1, "version" : 3 }
444 | shards:
445 | { "_id" : "firstset", "host" : "firstset/localhost:10001,localhost:10003,localhost:10002" }
446 | { "_id" : "secondset", "host" : "secondset/localhost:10004,localhost:10006,localhost:10005" }
447 | databases:
448 | { "_id" : "admin", "partitioned" : false, "primary" : "config" }
449 | { "_id" : "test", "partitioned" : true, "primary" : "secondset" }
450 | test.test_collection chunks:
451 | secondset 17
452 | firstset 174
453 | too many chunks to print, use verbose if you want to force print
454 | mongos>
455 | <% end %>
456 |
457 | The above demonstrates that, chunks are migrated to the shard on
458 | replica set "secondset" over time.
459 |
--------------------------------------------------------------------------------