├── CNAME
├── .gitignore
├── content
    ├── favicon.ico
    ├── img
    │   ├── cookbook.png
    │   └── logo-mongodb-home.png
    ├── operations
    │   ├── recipe.txt
    │   └── convert-replica-set-to-replicated-shard-cluster.txt
    ├── index.txt
    ├── css
    │   ├── style.css
    │   └── code.css
    └── patterns
    │   ├── finding_max_and_min_values_for_a_key.txt
    │   ├── pivot.txt
    │   ├── date_range.txt
    │   ├── track_max_value_in_array.txt
    │   ├── count_tags.txt
    │   ├── votes.txt
    │   ├── unique_items_map_reduce.txt
    │   ├── random-attribute.txt
    │   ├── finding_max_and_min.txt
    │   └── perform-two-phase-commits.txt
├── Sitefile
├── lib
    ├── helpers.rb
    ├── breadcrumbs.rb
    └── webby
    │   └── page.rb
├── templates
    ├── _partial.erb
    └── page.erb
├── LICENSE
├── README.md
└── layouts
    └── default.txt


/CNAME:
--------------------------------------------------------------------------------
1 | cookbook.mongodb.org
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | output
2 | *.swp
3 | *.sw0
4 | *~
5 | 


--------------------------------------------------------------------------------
/content/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiajian/cookbook/master/content/favicon.ico


--------------------------------------------------------------------------------
/content/img/cookbook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiajian/cookbook/master/content/img/cookbook.png


--------------------------------------------------------------------------------
/content/img/logo-mongodb-home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiajian/cookbook/master/content/img/logo-mongodb-home.png


--------------------------------------------------------------------------------
/Sitefile:
--------------------------------------------------------------------------------
1 | 
2 | task :default => :build
3 | 
4 | desc 'deploy the site to the webserver'
5 | task :deploy => [:build, 'deploy:rsync']
6 | 
7 | # EOF
8 | 


--------------------------------------------------------------------------------
/lib/helpers.rb:
--------------------------------------------------------------------------------
 1 | require File.join(File.dirname(__FILE__), 'breadcrumbs')
 2 | 
 3 | module Helpers
 4 |   def code(lang, &block)
 5 |     uv(:lang => lang, :theme => "twilight", &block)
 6 |   end
 7 | end
 8 | 
 9 | Webby::Helpers.register(Helpers)
10 | 


--------------------------------------------------------------------------------
/templates/_partial.erb:
--------------------------------------------------------------------------------
 1 | ---
 2 | filter: erb
 3 | ---
 4 | A partial has access to the page from which it was called. The title below will be the title of the page in which this partial is rendered.
 5 | 
 6 | <%%= h(@page.title) %>
 7 | 
 8 | A partial does not have access to it's own meta-data. The partial meta-data is used primarily for finding partials or for use in other pages. The filter(s) specified in the meta-data will be applied to the partial text when it is rendered.
 9 | 
10 | A partial does not require meta-data at all. They can contain just text.
11 | 


--------------------------------------------------------------------------------
/content/operations/recipe.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:      Sample Ops Recipe
 3 | created_at: 2010-04-19 10:05:24.036546 -04:00
 4 | recipe: true
 5 | author: Kyle Banker
 6 | description: The first ops recipe.
 7 | filter:
 8 |   - erb
 9 |   - markdown
10 | ---
11 | 
12 | ### Problem
13 | 
14 | You want to be able to:
15 | 
16 | * List objectives here
17 | 
18 | ### Solution
19 | 
20 | #### 1. Step 1
21 | 
22 | Here's a sample code block.
23 | <% code 'javascript' do %>
24 | db.foo.find();
25 | db.foo.hello();
26 | <% end %>
27 | 
28 | #### 2. Step 2
29 | 
30 | ### Discussion
31 | 
32 | Any relevant notes.
33 | 
34 | ### See Also
35 | 


--------------------------------------------------------------------------------
/lib/breadcrumbs.rb:
--------------------------------------------------------------------------------
 1 | # breadcrumbs.rb
 2 | 
 3 | module BreadcrumbsHelper
 4 |   # call-seq:
 5 |   #    breadcrumbs( page )    => html
 6 |   #
 7 |   # Create breadcrumb links for the current page. This will return an HTML
 8 |   # <ul></ul> object.
 9 |   #
10 |   def breadcrumbs( page )
11 |     list = ["<li> > &nbsp;#{h(page.title)}</li>"]
12 |     loop do
13 |       page = @pages.parent_of(page)
14 |       break if page.nil?
15 |       list << "<li>#{link_to_page(page)}</li>"
16 |     end
17 |     list.reverse!
18 | 
19 |     html = "<ul class=\"breadcrumbs clearfix\">\n"
20 |     html << list.join("\n")
21 |     html << "\n</ul>\n"
22 |     html
23 |   end
24 | end  # module Breadcrumbs
25 | 
26 | Webby::Helpers.register(BreadcrumbsHelper)
27 | 
28 | # EOF
29 | 


--------------------------------------------------------------------------------
/lib/webby/page.rb:
--------------------------------------------------------------------------------
 1 | # Monkey patch Webby to have nicer URLs.
 2 | # Generate each html pages in a <page_name>/index.html file.
 3 | # So the URL will look like /<page_name>
 4 | # Credits to Marc-André Cournoyer.
 5 | module Webby::Resources
 6 |   class Page < Resource
 7 |     def destination
 8 |       dest = super
 9 |       if prettify?
10 |         File.join(File.dirname(dest),
11 |                   File.basename(dest, ".*"),
12 |                   "index.html")
13 |       else
14 |         dest
15 |       end
16 |     end
17 | 
18 |     def url
19 |       if prettify?
20 |         super.gsub(/index\.html$/, "")
21 |       else
22 |         super
23 |       end
24 |     end
25 | 
26 |     private
27 |       def prettify?
28 |         filename != "index" && extension == "html"
29 |       end
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MongoDB Cookbook is inspired by The Redis Cookbook, Copyright (c)
 2 | 2010 Ted Nyman [http://ted.io], Tim Lossen [http://tim.lossen.de]
 3 | 
 4 | This work is licensed under the Creative Commons Attribution
 5 | Share Alike Unported License (Version 3.0):
 6 | 
 7 | http://creativecommons.org/licenses/by_sa/3.0/legalcode
 8 | 
 9 | Summary:
10 | 
11 | You are free to share (to copy, distribute and transmit) and
12 | to remix (to adapt) this work -- under the following
13 | conditions:
14 | 
15 | (a) You must attribute the work in the manner specified by the
16 | author or licensor (but not in any way that suggests that they
17 | endorse you or your use of the work).
18 | 
19 | (b) If you alter, transform, or build upon this work, you may
20 | distribute the resulting work only under the same, similar or
21 | a compatible license.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The MongoDB Cookbook
 2 | 
 3 | ### The ways and hows of MongoDB.
 4 | 
 5 | Contribute your patterns, methods, and ideas to the MongoDB Cookbook.
 6 | 
 7 | - - -
 8 | 
 9 | ### How to Contribute a Recipe
10 | 
11 | 1. Fork this repo and create a new topic branch.
12 | 2. Make your addition. See one of the recipe files for a sample.
13 | 3. Send a pull request -- please include a short description of your new or updated recipe.
14 | 4. Enjoy your awesomeness.
15 | 
16 | ### Even More Ways To Contribute
17 | 
18 | You can:
19 | 
20 | * Add example code for any existing recipe, in any programming language.
21 |   Fork the repo and add your code into the relevant directory!
22 | * Work on the cookbook.mongodb.org website. All the site lives in `content`.
23 | * Look for typos, formatting errors, missing links, and other little things. 
24 |   No potential improvement is 'too small' -- fork for anything.
25 | 
26 | ### License
27 | 
28 | Creative Commons Attribution Share Alike 3.0
29 | Inspired by The Redis Cookbook (http://rediscookbook.org)
30 | 
31 | ### Gems to Install
32 | * gem install webby
33 | * gem install ultraviolet
34 | * gem install maruku
35 | * gem install RedCloth
36 | * gem install rdiscount
37 | 


--------------------------------------------------------------------------------
/content/index.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:       The MongoDB Cookbook
 3 | created_at:  2008-08-02 14:06:40.000000 -06:00
 4 | dirty: true
 5 | filter:
 6 |   - erb
 7 | ---
 8 | <h1>A Cookbook for MongoDB</h1>
 9 | <p class='large'>Welcome to the <a href='http://mongodb.org'>MongoDB</a> cookbook. Here, we hope to provide guidance on all the common ways of using MongoDB. Got any wisdom to share? You can submit a recipe via GitHub by following the <a href="http://github.com/mongodb/cookbook/blob/master/README.md">instructions in the README</a>.
10 | </p>
11 | 
12 | <p class='large'>
13 | If you're new to MongoDB, be sure to check out:
14 | </p>
15 | <ul>
16 |   <li><a href="http://mongodb.org/">MongoDB Docs and Homepage</a></li>
17 |   <li><a href="http://www.mongodb.org/display/DOCS/Production+Deployments">MongoDB Production Deployments</a></li>
18 | </ul>
19 | 
20 | <h2>Recipes</h2>
21 | <% ['patterns', 'operations'].each do |subject| %>
22 |   <% articles = @pages.find(:all, :in_directory => subject, :recursive => true, :recipe => true, :sort_by => 'title') %>
23 | 
24 | <h3><%= subject.capitalize %></h3>
25 | 
26 | <ul>
27 | <% articles.each do |article| %>
28 |   <li><%= link_to_page(article) %></li>
29 | <% end %>
30 | </ul>
31 | <% end %>
32 | 
33 | <h2>Getting more help</h2>
34 | <p>Can find the answer you're looking for? Send a question to the <a href="http://groups.google.com/group/mongodb-user">MongoDB User List</a> or check out #mongodb on irc.freenode.net.</p>
35 | 


--------------------------------------------------------------------------------
/templates/page.erb:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:      <%= title %>
 3 | created_at: <%= Time.now.to_y %>
 4 | filter:
 5 |   - erb
 6 |   - textile
 7 | ---
 8 | p(title). <%%= h(@page.title) %>
 9 | 
10 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc congue ipsum vestibulum libero. Aenean vitae justo. Nam eget tellus. Etiam convallis, est eu lobortis mattis, lectus tellus tempus felis, a ultricies erat ipsum at metus.
11 | 
12 | h2. Litora Sociis
13 | 
14 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Morbi et risus. Aliquam nisl. Nulla facilisi. Cras accumsan vestibulum ante. Vestibulum sed tortor. Praesent tempus fringilla elit. Ut elit diam, sagittis in, nonummy in, gravida non, nunc. Ut orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos hymenaeos. Nam egestas, orci eu imperdiet malesuada, nisl purus fringilla odio, quis commodo est orci vitae justo. Aliquam placerat odio tincidunt nulla. Cras in libero. Aenean rutrum, magna non tristique posuere, erat odio eleifend nisl, non convallis est tortor blandit ligula. Nulla id augue.
15 | 
16 | bq. Nullam mattis, odio ut tempus facilisis, metus nisl facilisis metus, auctor consectetuer felis ligula nec mauris. Vestibulum odio erat, fermentum at, commodo vitae, ultrices et, urna. Mauris vulputate, mi pulvinar sagittis condimentum, sem nulla aliquam velit, sed imperdiet mi purus eu magna. Nulla varius metus ut eros. Aenean aliquet magna eget orci. Class aptent taciti sociosqu ad litora.
17 | 
18 | Vivamus euismod. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Suspendisse vel nibh ut turpis dictum sagittis. Aliquam vel velit a elit auctor sollicitudin. Nam vel dui vel neque lacinia pretium. Quisque nunc erat, venenatis id, volutpat ut, scelerisque sed, diam. Mauris ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec mattis. Morbi dignissim sollicitudin libero. Nulla lorem.
19 | 


--------------------------------------------------------------------------------
/content/css/style.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
  3 |   font-size: 14px;
  4 |   margin: 0;
  5 |   color: #303030;
  6 | }
  7 | 
  8 | a {
  9 |   text-decoration: none;
 10 |   color: #3471CD;
 11 | }
 12 | 
 13 | a:hover {
 14 |   text-decoration: underline;
 15 |   color: #3471CD;
 16 | }
 17 | 
 18 | hr {
 19 |   size: 100%;
 20 |   border-top: 1px dotted #999;
 21 | }
 22 | 
 23 | p {
 24 |   font-size: 14px;
 25 | }
 26 | 
 27 | h1 {
 28 |   font-size: 30px;
 29 | }
 30 | 
 31 | h2 {
 32 |   font-size: 25px;
 33 | }
 34 | 
 35 | h3 {
 36 |   font-size:18px;
 37 | }
 38 | 
 39 | h4 {
 40 |   font-size:14px;
 41 | }
 42 | 
 43 | body code { 
 44 |   color: #444; 
 45 |   font-size: 120%; 
 46 | }
 47 | 
 48 | body pre { 
 49 |   padding: 1em;
 50 |   border: 1px solid #dfe2e5; 
 51 |   overflow-x: auto;
 52 | }
 53 | 
 54 | .header {
 55 |   padding-top: 20px;
 56 |   padding-left: 100px;
 57 |   top: 0px;
 58 |   height: 120px;
 59 |   background-color: #3F2916;
 60 |   border-bottom:2px solid #999;
 61 | }
 62 | 
 63 | .wrapper {
 64 |   width: 900px;
 65 |   margin-top:40px;
 66 |   margin-bottom:100px;
 67 | }
 68 | 
 69 | .main {
 70 |   float:left;
 71 |   padding-left: 100px;
 72 |   margin-bottom: 40px;
 73 |   width: 750px;
 74 | }
 75 | 
 76 | .credit {
 77 |   font-style: italic;
 78 | }
 79 | 
 80 | .index li {
 81 |   list-style-image: url("/img/icon1.png");
 82 |   list-style-type: square;
 83 | }
 84 | 
 85 | .footer {
 86 |   font-size: 13px;
 87 |   clear: both;
 88 |   padding: 25px 0 30px 100px;
 89 |   border-top: 2px solid #999;	
 90 |   height: 40px;
 91 | }
 92 | 
 93 | .large {
 94 |   font-size: 15px;
 95 |   line-height: 20px;
 96 | }
 97 | 
 98 | ul.breadcrumbs {
 99 |   list-style-type: none;
100 |   padding: 0;
101 |   margin: 0;
102 | }
103 | 
104 | ul.breadcrumbs li {
105 |   float:left;
106 |   line-height:2em;
107 |   padding-right:.75em;
108 | }
109 | 
110 | ul.breadcrumbs li a {
111 |   display: block;
112 | }
113 | 
114 | .clearfix:after {
115 |     content: ".";
116 |     display: block;
117 |     height: 0;
118 |     clear: both;
119 |     visibility: hidden;
120 | }
121 | 


--------------------------------------------------------------------------------
/content/patterns/finding_max_and_min_values_for_a_key.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:      Finding Max And Min Values for a given Key
 3 | created_at: 2010-05-24 20:16:24.036546 -05:00
 4 | recipe: true
 5 | author: Eliot Horowitz
 6 | description: How to use MapReduce to find the min and max values for a given key
 7 | filter:
 8 |   - erb
 9 |   - markdown
10 | ---
11 | 
12 | ### Problem
13 | 
14 | You want to find the min and max value for a given field per key.
15 | 
16 | <% code 'javascript' do %>
17 | {
18 |     "_id" : "post 1",
19 |     "author" : "Bob",
20 |     "content" : "...",
21 |     "page_views" : 5
22 | }
23 | {
24 |     "_id" : "post 2",
25 |     "author" : "Bob",
26 |     "content" : "...",
27 |     "page_views" : 9
28 | }
29 | {
30 |     "_id" : "post 3",
31 |     "author" : "Bob",
32 |     "content" : "...",
33 |     "page_views" : 8
34 | }
35 | <% end %>
36 | 
37 | We want to end up with a collection of authors and their least and most popular posts:
38 | 
39 | <% code 'javascript' do %>
40 | { _id    : "Bob", 
41 |    value : { min : { page_views : 5 , _id : "post 1" } , 
42 |              max : { page_views , 9 , _id : "post 3" } } }
43 | <% end %>
44 | 
45 | ### Solution
46 | 
47 | Use the `mapreduce` database command.  Emit each document_id and version in the map function,
48 | then use the reduce function to find the max version.
49 | 
50 | #### 1. Map
51 | 
52 | <% code 'javascript' do %>
53 | map = function () {
54 |     var x = { page_views : this.page_views , _id : this._id };
55 |     emit(this.author, { min : x , max : x } )
56 | }
57 | <% end %>
58 | 
59 | #### 2. Reduce
60 | <% code 'javascript' do %>
61 | reduce = function (key, values) {
62 |     var res = values[0];
63 |     for ( var i=1; i<values.length; i++ ) {
64 |         if ( values[i].min.page_views < res.min.page_views ) 
65 |            res.min = values[i].min;
66 |         if ( values[i].max.page_views > res.max.page_views ) 
67 |            res.max = values[i].max;
68 |     }
69 |     return res;
70 | }
71 | <% end %>
72 | 
73 | #### 3. Call the `mapreduce` command
74 | <% code 'javascript' do%>
75 | db.posts.mapReduce( map , reduce , { out : { inline : true } } )
76 | <% end %>
77 | ### See Also
78 | 
79 | * The MongoDB [docs on mapreduce][1]
80 | 
81 |   [1]: http://www.mongodb.org/display/DOCS/MapReduce
82 | 


--------------------------------------------------------------------------------
/content/patterns/pivot.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:      Pivot Data with Map reduce
 3 | created_at: 2011-05-05 18:0:024.036546 -04:00
 4 | recipe: true
 5 | author: Gaetan Voyer-Perrault
 6 | description: How to use map-reduce to pivot table data.
 7 | filter:
 8 |   - erb
 9 |   - markdown
10 | ---
11 | 
12 | ### Problem
13 | 
14 | You have a collection of Actors with an array of the Movies they've done. 
15 | 
16 | You want to generate a collection of Movies with an array of Actors in each.
17 | 
18 | Some sample data
19 | 
20 | <% code 'javascript' do %>
21 |   db.actors.insert( { actor: "Richard Gere", movies: ['Pretty Woman', 'Runaway Bride', 'Chicago'] });
22 |   db.actors.insert( { actor: "Julia Roberts", movies: ['Pretty Woman', 'Runaway Bride', 'Erin Brockovich'] });
23 | <% end %>
24 | 
25 | ### Solution
26 | 
27 | We need to loop through each movie in the Actor document and emit each Movie individually.
28 | 
29 | The catch here is in the reduce phase. We cannot emit an array from the reduce phase, so we must build an Actors array inside of the "value" document that is returned.
30 | 
31 | #### The code
32 | 
33 | <% code 'javascript' do %>
34 | map = function() {
35 |   for(var i in this.movies){
36 | 	key = { movie: this.movies[i] };
37 | 	value = { actors: [ this.actor ] };
38 | 	emit(key, value);
39 |   }
40 | }
41 | 
42 | reduce = function(key, values) {
43 |   actor_list = { actors: [] };
44 |   for(var i in values) {
45 | 	actor_list.actors = values[i].actors.concat(actor_list.actors);
46 |   }
47 |   return actor_list;
48 | }
49 | <% end %>
50 | 
51 | Notice how actor_list is actually a javascript object that contains an array. Also notice that map emits the same structure.
52 | 
53 | Run the following to execute the map / reduce, output it to the "pivot" collection and print the result:
54 | 
55 | <% code 'javascript' do %>
56 |   printjson(db.actors.mapReduce(map, reduce, "pivot"));
57 |   db.pivot.find().forEach(printjson);
58 | <% end %>
59 | 
60 | Here is the sample output, note that "Pretty Woman" and "Runaway Bride" have both "Richard Gere" and "Julia Roberts".
61 | 
62 |     { "_id" : { "movie" : "Chicago" }, "value" : { "actors" : [ "Richard Gere" ] } }
63 |     { "_id" : { "movie" : "Erin Brockovich" }, "value" : { "actors" : [ "Julia Roberts" ] } }
64 |     { "_id" : { "movie" : "Pretty Woman" }, "value" : { "actors" : [ "Richard Gere", "Julia Roberts" ] } }
65 |     { "_id" : { "movie" : "Runaway Bride" }, "value" : { "actors" : [ "Richard Gere", "Julia Roberts" ] } }
66 | 


--------------------------------------------------------------------------------
/content/patterns/date_range.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Querying for a Date Range (Specific Month or Day)
 3 | created_at: 2010-04-20 15:03:24.036546 -04:00
 4 | recipe: true
 5 | author: Mike Dirolf
 6 | description: How to query for documents from a certain month or day.
 7 | filter:
 8 |   - erb
 9 |   - markdown
10 | ---
11 | 
12 | ### Problem
13 | 
14 | You want to list all of the documents in a collection (in the example
15 | we'll use "posts") that were created in a particular month.  Each
16 | document in the collection has a field representing the date it was
17 | created:
18 | 
19 | <% code 'javascript' do %>
20 | {
21 |     "title" : "A blog post",
22 |     "author" : "Mike",
23 |     "content" : "...",
24 |     "created_on" : new Date();
25 | }
26 | <% end %>
27 | 
28 | We want to perform a query to get all documents whose value for
29 | `created_on` is in the month of April, 2010.
30 | 
31 | ### Solution
32 | 
33 | Use a range query to query for documents whose value for `created_on`
34 | is greater than a Date representing the start of the month, and less
35 | than a Date representing the end.
36 | 
37 | #### 1. Construct Date objects representing the start and end of the month
38 | 
39 | Our first step is to construct Date instances that we can use to do
40 | the range query. In JavaScript:
41 | 
42 | <% code 'javascript' do %>
43 | var start = new Date(2010, 3, 1);
44 | var end = new Date(2010, 4, 1);
45 | <% end %>
46 | 
47 | Note that in JS the month portion of the Date constructor is
48 | 0-indexed, so the `start` variable above is April 1st and the `end`
49 | variable is May 1st. The logic here is similar in all languages, in Python we'd do:
50 | 
51 | <% code 'python' do %>
52 | >>> from datetime import datetime
53 | >>> start = datetime(2010, 4, 1)
54 | >>> end = datetime(2010, 5, 1)
55 | <% end %>
56 | 
57 | #### 2. Perform a range query
58 | 
59 | Now that we have our reference dates, we can perform a range query to
60 | get the matching documents, note the use of the special `$` operators,
61 | `$gte` (greater-than) and `$lt` (less-than):
62 | 
63 | <% code 'javascript' do %>
64 | db.posts.find({created_on: {$gte: start, $lt: end}});
65 | <% end %>
66 | 
67 | Again, this translates nicely to other languages - in Python it's:
68 | 
69 | <% code 'python' do %>
70 | >>> db.posts.find({"created_on": {"$gte": start, "$lt": end}})
71 | <% end %>
72 | 
73 | #### 3. Use an index for performance
74 | 
75 | To make these queries fast we can use an index on the `created_on` field:
76 | 
77 | <% code 'javascript' do %>
78 | db.posts.ensureIndex({created_on: 1});
79 | <% end %>
80 | 
81 | We can also use a compound index if we're performing a query on author
82 | and a date range, like so:
83 | 
84 | <% code 'javascript' do %>
85 | db.posts.ensureIndex({author: 1, created_on: 1});
86 | db.posts.find({author: "Mike", created_on: {$gt: start, $lt: end}});
87 | <% end %>
88 | 
89 | 


--------------------------------------------------------------------------------
/content/patterns/track_max_value_in_array.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:      Track maximum value in array
 3 | created_at: 2011-12-14 13:30:00.000000 -04:00
 4 | recipe: true
 5 | author: Dan Crosta
 6 | description: How to keep a "max_value" attribute up to date when pushing values to an array
 7 | filter:
 8 |   - erb
 9 |   - markdown
10 | ---
11 | 
12 | ## Problem
13 | 
14 | Your document contains an array of numbers and you want to add an
15 | attribute to the document which contains the maximum value in the array.
16 | You want to ensure that the document is updated safely and atomically so
17 | that this value always represents the maximum value after any number of
18 | additions to the array.
19 | 
20 | ### Assumptions
21 | 
22 | * You are updating the document by its `_id` or another unique field.
23 | * You know the document already exists (i.e. you are not "upserting.")
24 | 
25 | ## Solution
26 | 
27 | MongoDB's atomic updates to not allow you to perform in-document
28 | comparisons when updating--that is, there is no operator which will
29 | update a value *if and only if* it is greater than the existing
30 | value. Such an operator would render this recipe trivial.
31 | 
32 | However, you can accomplish this task with two invocations of the
33 | `findAndModify` command:
34 | 
35 | 1. Issue a `findAndModify` that sets the `max_value` and pushes to the
36 |    array at the same time. This operation only succeeds if the
37 |    `max_value` is less than or equal to the new value.
38 | 
39 | 2. If the previous operation fails, it can only be because `max_value`
40 |    is already greater than the new value, so it is safe to push the new
41 |    value without regard for `max_value`.
42 | 
43 | 
44 | To obtain the result of the `findAndModify` command, take the first
45 | result that succeeds and assign it to the `result` variable. Because the
46 | second `findAndModify` only runs if the preceding operations made no
47 | updates, then we know that there can only ever be a single value of
48 | `result`.
49 | 
50 | The code for this operation resembles:
51 | 
52 | <% code 'javascript' do %>
53 | var result1 = null, result2 = null;
54 | 
55 | result1 = db.collection.findAndModify({
56 |     query: {_id: ObjectId(...), max_value: {$lte: new_value}},
57 |     update: {$push: {array: new_value}, $set: {max_value: new_value}}});
58 | 
59 | if (result1 === null ) {
60 |     result3 = db.collection.findAndModify({
61 |         query: {_id: ObjectId(...)},
62 |         update: {$push: {array: new_value}}});
63 | }
64 | 
65 | var result = result1 || result2;
66 | <% end %>
67 | 
68 | ## Variations
69 | 
70 | If you want the `result` variable to include the changes made by
71 | whichever of the two `findAndModify`s succeeded, add `new: true` to the
72 | arguments to `findAndModify`.
73 | 
74 | If you want the `array` attribute of the document to contain a set of
75 | unique values, rather than an array of all values pushed, use the
76 | `$addToSet` operator rather than `$push`.
77 | 


--------------------------------------------------------------------------------
/content/patterns/count_tags.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title:      Counting Tags
  3 | created_at: 2010-04-19 10:05:24.036546 -04:00
  4 | recipe: true
  5 | author: Kristina Chodorow
  6 | description: How to use MapReduce to find the tags for a given collection.
  7 | filter:
  8 |   - erb
  9 |   - markdown
 10 | ---
 11 | 
 12 | ### Problem
 13 | 
 14 | You want to create a tag cloud or see what the most popular tags are in a given
 15 | collection, say, "posts".  Each document in the collection has an array of tags,
 16 | such as:
 17 | 
 18 | <% code 'javascript' do %>
 19 | {
 20 |     "title" : "A blog post",
 21 |     "author" : "Kristina",
 22 |     "content" : "...",
 23 |     "tags" : ["MongoDB", "Map/Reduce", "Recipe"]
 24 | }
 25 | <% end %>
 26 | 
 27 | We want to end up with a "tags" collection that has documents that look like 
 28 | this:
 29 | 
 30 | <% code 'javascript' do %>
 31 | {"_id" : "MongoDB", "value" : 4}
 32 | {"_id" : "Map/Reduce", "value" : 2}
 33 | {"_id" : "Recipe", "value" : 7}
 34 | {"_id" : "Group", "value" : 1}
 35 | <% end %>
 36 | 
 37 | ### Solution
 38 | 
 39 | Use the `mapreduce` database command.  Emit each tag in the map function, then 
 40 | count them in the reduce function.
 41 | 
 42 | #### 1. Map
 43 | 
 44 | The map function first checks if there is a tags field, as running a for-loop on
 45 | undef would cause an error.  Once that has been established, we go through each
 46 | element, emiting the tag name and a count of 1:
 47 | 
 48 | <% code 'javascript' do %>
 49 | map = function() {
 50 |     if (!this.tags) {
 51 |         return;
 52 |     }
 53 | 
 54 |     for (index in this.tags) {
 55 |         emit(this.tags[index], 1);
 56 |     }
 57 | }
 58 | <% end %>
 59 | 
 60 | #### 2. Reduce
 61 | 
 62 | For the reduce function, we initialize a counter to 0 and then add each element
 63 | of the `current` array to it.  Then we return the final count.
 64 | 
 65 | <% code 'javascript' do %>
 66 | reduce = function(previous, current) {
 67 |     var count = 0;
 68 | 
 69 |     for (index in current) {
 70 |         count += current[index];
 71 |     }
 72 | 
 73 |     return count;
 74 | }
 75 | <% end %>
 76 | 
 77 | #### 3. Call the `mapreduce` command
 78 | 
 79 | We want to put the results in the "tags" collection, so we'll specify that with 
 80 | the `out` parameter:
 81 | 
 82 | <% code 'javascript' do %>
 83 | > result = db.runCommand({
 84 | ... "mapreduce" : "posts",
 85 | ... "map" : map,
 86 | ... "reduce" : reduce,
 87 | ... "out" : "tags"})
 88 | <% end %>
 89 | 
 90 | Now, if we query the tags collection, we find:
 91 | 
 92 | <% code 'javascript' do %>
 93 | > db.tags.find()
 94 | {"_id" : "MongoDB", "value" : 4}
 95 | {"_id" : "Map/Reduce", "value" : 2}
 96 | {"_id" : "Recipe", "value" : 7}
 97 | {"_id" : "Group", "value" : 1}
 98 | <% end %>
 99 | 
100 | ### See Also
101 | 
102 | * The MongoDB [docs on aggregation][1]
103 | * [Map-Reduce Basics][2] by Kyle Banker
104 | * [MapReduce: the Fanfiction][3] by Kristina Chodorow
105 | 
106 |   [1]: http://www.mongodb.org/display/DOCS/Aggregation
107 |   [2]: http://kylebanker.com/blog/2009/12/mongodb-map-reduce-basics/
108 |   [3]: http://www.snailinaturtleneck.com/blog/2010/03/15/mapreduce-the-fanfiction/
109 | 
110 | 


--------------------------------------------------------------------------------
/content/css/code.css:
--------------------------------------------------------------------------------
  1 | pre.twilight .DiffInserted {
  2 |    background-color: #253B22;
  3 |    color: #F8F8F8;
  4 | }
  5 | pre.twilight .DiffHeader {
  6 |    background-color: #0E2231;
  7 |    color: #F8F8F8;
  8 |    font-style: italic;
  9 | }
 10 | pre.twilight .CssPropertyValue {
 11 |    color: #F9EE98;
 12 | }
 13 | pre.twilight .CCCPreprocessorDirective {
 14 |    color: #AFC4DB;
 15 | }
 16 | pre.twilight .Constant {
 17 |    color: #CF6A4C;
 18 | }
 19 | pre.twilight .DiffChanged {
 20 |    background-color: #4A410D;
 21 |    color: #F8F8F8;
 22 | }
 23 | pre.twilight .EmbeddedSource {
 24 |    background-color: #A3A6AD;
 25 | }
 26 | pre.twilight .Support {
 27 |    color: #9B859D;
 28 | }
 29 | pre.twilight .MarkupList {
 30 |    color: #F9EE98;
 31 | }
 32 | pre.twilight .CssConstructorArgument {
 33 |    color: #8F9D6A;
 34 | }
 35 | pre.twilight .Storage {
 36 |    color: #F9EE98;
 37 | }
 38 | pre.twilight .line-numbers {
 39 |    background-color: #5C5B51;
 40 |    color: #D1D0B8;
 41 | }
 42 | pre.twilight .CssClass {
 43 |    color: #9B703F;
 44 | }
 45 | pre.twilight .StringConstant {
 46 |    color: #DDF2A4;
 47 | }
 48 | pre.twilight .CssAtRule {
 49 |    color: #8693A5;
 50 | }
 51 | pre.twilight .MetaTagInline {
 52 |    color: #E0C589;
 53 | }
 54 | pre.twilight .MarkupHeading {
 55 |    color: #CF6A4C;
 56 | }
 57 | pre.twilight .CssTagName {
 58 |    color: #CDA869;
 59 | }
 60 | pre.twilight .SupportConstant {
 61 |    color: #CF6A4C;
 62 | }
 63 | pre.twilight .DiffDeleted {
 64 |    background-color: #420E09;
 65 |    color: #F8F8F8;
 66 | }
 67 | pre.twilight .CCCPreprocessorLine {
 68 |    color: #8996A8;
 69 | }
 70 | pre.twilight .StringRegexpSpecial {
 71 |    color: #CF7D34;
 72 | }
 73 | pre.twilight .EmbeddedSourceBright {
 74 |    background-color: #9C9EA4;
 75 | }
 76 | pre.twilight .InvalidIllegal {
 77 |    background-color: #241A24;
 78 |    color: #F8F8F8;
 79 | }
 80 | pre.twilight .SupportFunction {
 81 |    color: #DAD085;
 82 | }
 83 | pre.twilight .CssAdditionalConstants {
 84 |    color: #CA7840;
 85 | }
 86 | pre.twilight .MetaTagAll {
 87 |    color: #AC885B;
 88 | }
 89 | pre.twilight .StringRegexp {
 90 |    color: #E9C062;
 91 | }
 92 | pre.twilight .StringEmbeddedSource {
 93 |    color: #DAEFA3;
 94 | }
 95 | pre.twilight .EntityInheritedClass {
 96 |    color: #9B5C2E;
 97 |    font-style: italic;
 98 | }
 99 | pre.twilight .CssId {
100 |    color: #8B98AB;
101 | }
102 | pre.twilight .CssPseudoClass {
103 |    color: #8F9D6A;
104 | }
105 | pre.twilight .StringVariable {
106 |    color: #8A9A95;
107 | }
108 | pre.twilight .String {
109 |    color: #8F9D6A;
110 | }
111 | pre.twilight .Keyword {
112 |    color: #CDA869;
113 | }
114 | pre.twilight {
115 |    background-color: #141414;
116 |    color: #F8F8F8;
117 | }
118 | pre.twilight .CssPropertyName {
119 |    color: #C5AF75;
120 | }
121 | pre.twilight .DoctypeXmlProcessing {
122 |    color: #494949;
123 | }
124 | pre.twilight .InvalidDeprecated {
125 |    color: #D2A8A1;
126 |    font-style: italic;
127 | }
128 | pre.twilight .Variable {
129 |    color: #7587A6;
130 | }
131 | pre.twilight .Entity {
132 |    color: #9B703F;
133 | }
134 | pre.twilight .Comment {
135 |    color: #5F5A60;
136 |    font-style: italic;
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/content/patterns/votes.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | title:      Voting with Atomic Operators
 3 | created_at: 2010-04-19 10:05:24.036546 -04:00
 4 | recipe: true
 5 | author: Kyle Banker
 6 | description: How to use MongoDB atomic operators to implement efficient voting.
 7 | filter:
 8 |   - erb
 9 |   - markdown
10 | ---
11 | 
12 | ### Problem
13 | 
14 | You want to give your users the ability to vote on things. Whether it's articles,
15 | comments, photos, or tweets, it seems like everything needs voteability.
16 | 
17 | * Make sure that each user gets just one vote.
18 | * Keep a counter cache on the number of votes.
19 | 
20 | ### Solution
21 | 
22 | The solution is provided in JavaScript; translating to the language of your choice
23 | should be pretty straightforward.
24 | 
25 | #### 1. Store the vote information in the object itself.
26 | 
27 | Let's say you're building a social news site like Digg. You want your users to be able
28 | to vote on submitted stories. Here's a sample story document with all the information
29 | required for voting:
30 | 
31 | <% code 'javascript' do %>
32 | {'_id': ObjectId("4bcc9e697e020f2d44471d27"),
33 |  title: 'Aliens discovered on Mars!',
34 |  description: 'Martian'
35 |  vote_count: 0,
36 |  voters: []
37 | }
38 | <% end %>
39 | 
40 | Notice that we've reserved two fields for voting: the first is an integer caching the number of votes,
41 | and the second is a list of voters.
42 | 
43 | #### 2. Use an atomic update operation for adding and removing votes.
44 | 
45 | Here you get to see what's great about atomic operators. You can reliably add the vote, without
46 | risking a duplicate, in a single operation. Here's the code to update the story above:
47 | 
48 | <% code 'javascript' do %>
49 | // Get the user id who's voting
50 | user_id = ObjectId("4bcc9e697e020f2d44471a15");
51 | 
52 | // This query succeeds only if the voters array doesn't contain the user
53 | query   = {_id: ObjectId("4bcc9e697e020f2d44471d27"), voters: {'$ne': user_id});
54 | 
55 | // Update to add the user to the array and increment the number of votes.
56 | update  = {'$push': {'voters': user_id}, '$inc': {vote_count: 1}}
57 | 
58 | db.stories.update(query, update);
59 | <% end %>
60 | 
61 | #### 3. If you want to allow users to retract their votes, the code is quite simiar:
62 | 
63 | The only difference is that we use the **$pull** operator, and we decrement by passing
64 | -1 to **$inc**.
65 | 
66 | <% code 'javascript' do %>
67 | // This query succeeds when the voter has already voted on the story.
68 | query   = {_id: ObjectId("4bcc9e697e020f2d44471d27"), voters: user_id};
69 | 
70 | // Update to remove the user from the array and decrement the number of votes.
71 | update  = {'$pull': {'voters': user_id}, '$inc': {vote_count: -1}}
72 | 
73 | db.stories.update(query, update);
74 | <% end %>
75 | 
76 | ### Discussion
77 | 
78 | One thing to note is that because the operation of step 2 uses the **$ne** operator, that part of the query
79 | can't use an index. This may become a problem if you expect many hundreds of votes per story; any fewer
80 | shouldn't be a concern.
81 | 
82 | By contrast, the query in step 3 _can_ use a compound index efficiently:
83 | 
84 | <% code 'javascript' do %>
85 | db.stories.ensureIndex({'_id': 1, voters: 1});
86 | <% end %>
87 | 
88 | However, you'd create this index only if you expect people to be changing their votes often (which usually
89 | isn't the case).
90 | 


--------------------------------------------------------------------------------
/layouts/default.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | extension: html
 3 | filter:    erb
 4 | description: A cookbook full of mouth-watering MongoDB recipes.
 5 | ---
 6 | <html>
 7 | <head>
 8 |   <% if @page.recipe %>
 9 |     <title><%= @page.title %> | The MongoDB Cookbook</title>
10 |   <% else %>
11 |     <title>The MongoDB Cookbook</title>
12 |   <% end %>
13 |   <link rel="stylesheet" href="/css/style.css" />
14 |   <link rel="stylesheet" href="/css/code.css" />
15 | 
16 |   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
17 |   <meta name="description" content="<%= @page.description || 'Standard' %>" />
18 | 
19 |   <link rel="shortcut icon" href="/favicon.ico">
20 | </head>
21 | <body>
22 | 
23 | <a href="http://github.com/mongodb/cookbook"><img style="position: absolute; top: 0; right: 0; border: 0;" src="http://s3.amazonaws.com/github/ribbons/forkme_right_green_007200.png" alt="Fork me on GitHub" /></a>
24 | 
25 | <div class='header'>
26 |   <h1><a href='/'><img src="/img/logo-mongodb-home.png" border="0" alt="MongoDB Cookbook"></a>
27 |   <a href='/'><img src="/img/cookbook.png" border="0" alt="MongoDB Cookbook"></a></h1>
28 | </div>
29 | 
30 | <div class='main'>
31 |   <% if @page.recipe %>
32 |     <%= breadcrumbs(@page) %>
33 |     <h1><%= @page.title %></h1>
34 | 
35 |     <div class='credit'>
36 |       Credit: <%= @page.author %>
37 |     </div>
38 |   <% end %>
39 |   <%= @content %>
40 | 
41 |   <% if @page.recipe %>
42 | 
43 | 
44 | <div id="disqus_thread"></div>
45 | <script type="text/javascript">
46 |   /**
47 |     * var disqus_identifier; [Optional but recommended: Define a unique identifier (e.g. post id or slug) for this thread] 
48 |     */
49 |   (function() {
50 |    var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
51 |    dsq.src = 'http://mongodb.disqus.com/embed.js';
52 |    (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
53 |   })();
54 | </script>
55 | <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript=mongodb">comments powered by Disqus.</a></noscript>
56 | <a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>
57 |   <% end %>
58 | </div>
59 | 
60 | <div class='footer'>
61 |   Edited by <a href='http://mongodb.org'>The MongoDB Folks</a>. License: <a href="http://creativecommons.org/licenses/by-sa/3.0/">Creative Commons Attribution Share Alike 3.0</a>. Inspired by <a href="http://rediscookbook.org/">The Redis Cookbook</a>.
62 | </div>
63 | 
64 | <script type="text/javascript">
65 | //<![CDATA[
66 | (function() {
67 |   var links = document.getElementsByTagName('a');
68 |   var query = '?';
69 |   for(var i = 0; i < links.length; i++) {
70 |   if(links[i].href.indexOf('#disqus_thread') >= 0) {
71 |     query += 'url' + i + '=' + encodeURIComponent(links[i].href) + '&';
72 |   }
73 |   }
74 |   document.write('<script charset="utf-8" type="text/javascript" src="http://disqus.com/forums/mongodb/get_num_replies.js' + query + '"></' + 'script>');
75 | })();
76 | //]]>
77 | </script>
78 | 
79 | <script type="text/javascript">
80 | var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
81 | document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
82 | </script> 
83 | <script type="text/javascript">
84 | try {
85 | var pageTracker = _gat._getTracker("UA-7301842-1");
86 | pageTracker._trackPageview();
87 | } catch(err) {}</script>
88 | </body>
89 | </html>
90 | 


--------------------------------------------------------------------------------
/content/patterns/unique_items_map_reduce.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title:      Counting Unique Items with Map-Reduce
  3 | created_at: 2010-05-05 10:05:24.036546 -04:00
  4 | recipe: true
  5 | author: Kyle Banker
  6 | description: How to use map-reduce to count unique items across a category.
  7 | filter:
  8 |   - erb
  9 |   - markdown
 10 | ---
 11 | 
 12 | ### Problem
 13 | 
 14 | You have a collection that stores pageviews by user, and you want to count the total
 15 | number of unique user visits per day using map-reduce.
 16 | 
 17 | Each pageview document looks something like this:
 18 | 
 19 | <% code 'javascript' do %>
 20 | {
 21 |     "url" : "http://example.com/photos",
 22 |     "user_id" : ObjectID('4be1c916e031933119d78b30'),
 23 |     "date": "Wed May 05 2010 15:37:58 GMT-0400 (EDT)"
 24 | }
 25 | <% end %>
 26 | 
 27 | The solution requires grouping the pageviews by day and then counting
 28 | the total number of user visits and along with the number of unique visits for that day.
 29 | 
 30 | ### Solution
 31 | 
 32 | What's tricky about this situation is that it requires a two-pass map-reduce in order to scale well.
 33 | The first pass involves grouping by date and user id. This allows us to group by user and day and
 34 | returns, as a side effect, the number of pageviews per user per day.
 35 | 
 36 | #### 1. First Pass
 37 | 
 38 | ##### Map Step
 39 | 
 40 | The only tricky part about the map function is making sure that we emit on the day. Since we're storing
 41 | a full date, we need to parse out just the year, month, and date, and then emit on that value:
 42 | 
 43 | <% code 'javascript' do %>
 44 | map = function() {
 45 |   day = Date.UTC(this.date.getFullYear(), this.date.getMonth(), this.date.getDate());
 46 | 
 47 |   emit({day: day, user_id: this.user_id}, {count: 1});
 48 | }
 49 | <% end %>
 50 | 
 51 | If you want a more efficient date calculation, you can use this:
 52 | 
 53 | <% code 'javascript' do %>
 54 |   day = (24 * 60 * 60) % this.date;
 55 | <% end %>
 56 | 
 57 | ##### Reduce Step
 58 | 
 59 | The reduce function is trivial, as it simply performs a count:
 60 | 
 61 | 
 62 | <% code 'javascript' do %>
 63 | reduce = "function(key, values) {
 64 |   var count = 0;
 65 | 
 66 |   values.forEach(function(v) {
 67 |     count += v['count'];
 68 |   });
 69 | 
 70 |   return {count: count};
 71 | }"
 72 | <% end %>
 73 | 
 74 | ##### Run the command
 75 | 
 76 | We run the mapReduce command, storing the output in the *pageview_results* collection:
 77 | 
 78 | <% code 'javascript' do %>
 79 | db.pageviews.mapReduce(map, reduce, {out: pageview_results});
 80 | <% end %>
 81 | 
 82 | #### 2. Second Pass
 83 | 
 84 | ##### Map Step
 85 | 
 86 | Now that we have a prelimiary set of results, we can do a second pass to count unique users by day.
 87 | Here's the map function:
 88 | 
 89 | <% code 'javascript' do %>
 90 | map = "function() {
 91 |   emit(this['_id']['day'], {count: 1});
 92 | }"
 93 | <% end %>
 94 | 
 95 | Because the first result set will store the *emit* key within an '_id' field, we have to reach into
 96 | that object to get the date.
 97 | 
 98 | ##### Reduce Step
 99 | 
100 | It turns out that the same reduce function will work for the second pass; no need to rewrite.
101 | 
102 | ##### Running the command
103 | 
104 | Now just run the mapReduce command on the result collection, and ouput to a new results collection.
105 | 
106 | <% code 'javascript' do %>
107 | db.pageview_results.mapReduce(map, reduce, {out: pageview_results_unique});
108 | <% end %>
109 | 
110 | Since we've specified that the output collection should be called *pageview_results_unique*, we can
111 | query that collection to see the results:
112 | 
113 | <% code 'javascript' do %>
114 | db.pageview_results_unique.find();
115 | <% end %>
116 | 
117 | That's all there is to it!
118 | 
119 | #### 5. Limiting the Operation
120 | 
121 | If our pageviews collection spans a long period of time, it might be prudent to run map-reduce over
122 | just a portion of the data. That can be achieved by passing a query selector to the map-reduce command. So,
123 | for instance, if we just wanted results from the past two weeks, we could run:
124 | 
125 | <% code 'javascript' do %>
126 | two_weeks_ago = new Date(Date.now() - 60 * 60 * 24 * 14 * 1000);
127 | db.pageviews.mapReduce(map, reduce,
128 |   {out: pageview_results, query: {date: {'$gt': two_weeks_ago}}});
129 | <% end %>
130 | 
131 | ### See Also
132 | 
133 | * The MongoDB [docs on aggregation][1]
134 | * [Map-Reduce Basics][2] by Kyle Banker
135 | * [MapReduce: the Fanfiction][3] by Kristina Chodorow
136 | 
137 |   [1]: http://www.mongodb.org/display/DOCS/Aggregation
138 |   [2]: http://kylebanker.com/blog/2009/12/mongodb-map-reduce-basics/
139 |   [3]: http://www.snailinaturtleneck.com/blog/2010/03/15/mapreduce-the-fanfiction/
140 | 
141 | 


--------------------------------------------------------------------------------
/content/patterns/random-attribute.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title:      The Random Attribute
  3 | created_at: 2010-05-12 10:05:24.036546 -04:00
  4 | recipe: true
  5 | author: Alberto Lerner, Dwight Merriman, and Aaron Staple
  6 | description:
  7 | filter:
  8 |  - erb
  9 |  - markdown
 10 | ---
 11 | 
 12 | ### Problem
 13 | 
 14 | Consider a scenario where you'd like to issue a query but would like
 15 | to pick a random document in the result.
 16 | 
 17 | <% code 'javascript' do %>
 18 | photos.find({"author":"John Doe"})
 19 | <% end %>
 20 | 
 21 | Any 'John Doe' would do. But you'd like a different one each time you
 22 | run the query. Sure, you can always count the resulting documents and
 23 | randomly pick one. But in that case the query would be run in its
 24 | entirety and all the results would be transferred to your app.
 25 | 
 26 | Now, consider another scenario where you'd like to run a map/reduce
 27 | but would be happy to trade result accuracy for performance. That is,
 28 | you'd be happy to use a sample (of a given percentage) of you data.
 29 | You don't really know the number of documents involved, but they are
 30 | numerous.
 31 | 
 32 | 
 33 | ### Solution
 34 | 
 35 | We can add a special attribute in each document that we'll call here
 36 | the 'random attribute,' RA. The RA needs to be, well, random. If you
 37 | use, for instance, a number generated by Math.random() in Javascript,
 38 | that would work.
 39 | 
 40 | <% code 'javascript' do %>
 41 | > db.docs.drop()
 42 | > db.docs.save( { key : 1, ..., random : Math.random() } )
 43 | > db.docs.save( { key : 1, ..., random : Math.random() } )
 44 | > db.docs.save( { key : 2, ..., random : Math.random() } )
 45 | ... many more insertions with 'key : 2' ...
 46 | > db.docs.save( { key : 2, ..., random : Math.random() } )
 47 | ...
 48 | <% end %>
 49 | 
 50 | If you use Math.random(), the random attribute in your documents could
 51 | have any value from 0 to 1:
 52 | 
 53 | 
 54 | <% code 'javascript' do %>
 55 | ...
 56 | { "_id" : ObjectId("4bfa81198cf5fc1002a42b91"), "key" : 2, "random" : 0.23578915913357468}
 57 | ...
 58 | { "_id" : ObjectId("4bfa81198cf5fc1002a42b93"), "key" : 2, "random" : 0.8983254666113549 }
 59 | ...
 60 | <% end %>
 61 | 
 62 | 
 63 | The solution also requires the RA to be indexed in a certain way. But
 64 | let's discuss this using an example.
 65 | 
 66 | 
 67 | 
 68 | ### 1. Picking a random document from the result
 69 | 
 70 | If you're just interested in one document, you'd formulate your
 71 | original query and add a filter over the RA. The idea here is
 72 | that you'd try to find which of the result documents has the closest
 73 | RA to a random number you draw at query time.
 74 | 
 75 | The code below shows one way to do it. You'd pick a random number on the
 76 | fly -- using the same method you used to populate the RA -- and test
 77 | your number against the stored attribute.
 78 | 
 79 | 
 80 | <% code 'javascript' do %>
 81 | > rand = Math.random()
 82 | > cmp  = Math.random()
 83 | > result = db.docs.findOne( { key : 2, random : { $gte : rand } } )
 84 | > if ( result == null ) {
 85 | >   result = db.docs.findOne( { key : 2, random : { $lte : rand } } )
 86 | > }
 87 | <% end %>
 88 | 
 89 | 
 90 | Note that we're not going for equality alone because the chances of
 91 | that to occur are low. So we try either '$gte' or '$lte' with equal
 92 | probability but knowing that in some cases it may not return a result,
 93 | even though there are documents in the result. For that reason, an
 94 | empty result must be verified by doing a search in the opposite
 95 | direction.
 96 | 
 97 | The final -- but important -- detail about this query is that both the
 98 | search criteria and the RA must be indexed together. Please, see the
 99 | Caveats sections for further details.
100 | 
101 | <% code 'javascript' do %>
102 | > db.docs.ensureIndex( { key : 1, random :1 } )
103 | <% end %>
104 | 
105 | 
106 | 
107 | ### 2. Map/reduce on a sample of the data
108 | 
109 | If your collection is large and the computation you want to run could
110 | operate on a sample instead, ie tolerating a less accurate result, you
111 | can have the mapping phase apply an early filter based on the RA.
112 | 
113 | <% code 'javascript' do %>
114 | > db.docs.drop()
115 | > for (i=0; i < 10000; i++) { db.docs.save( { key : i % 10, rand : Math.random() } ) }
116 | > m = function() { emit(this.key, 1); }
117 | > r = function(k, vals) {
118 |   var sum=0;
119 |   for (var i in vals) sum += vals[i];
120 |   return sum;
121 |  }
122 | > sample = 0.1
123 | > res = db.docs.mapReduce(m, r, { query : { key : 2, rand : { $lte: sample } } })
124 | <% end %>
125 | 
126 | Mongo will issue the query over all the data but the mapper would be
127 | called only for the sampled documents, 10% of them here. In the
128 | example above, the running time should be significantly reduced as
129 | compared with the "full" query. The counter for 'key : 2' with a 10%
130 | sample was 85 when the perfect result would have been 100. 
131 | 
132 | You could improve accuracy by increasing the sample size, of course.
133 | 
134 | Here's a sample-based count for all the keys. This should give you an
135 | idea about the speed/accuracy trade-off.
136 | 
137 | <% code 'javascript' do %>
138 | > res = db.docs.mapReduce(m, r, { query : { rand : { $lte : 0.1 } } }) 
139 | ...
140 | > db[res.result].find()
141 | { "_id" : 0, "value" : 93 }
142 | { "_id" : 1, "value" : 82 }
143 | { "_id" : 2, "value" : 85 }
144 | { "_id" : 3, "value" : 92 }
145 | { "_id" : 4, "value" : 114 }
146 | { "_id" : 5, "value" : 104 }
147 | { "_id" : 6, "value" : 100 }
148 | { "_id" : 7, "value" : 90 }
149 | { "_id" : 8, "value" : 104 }
150 | { "_id" : 9, "value" : 103 }
151 | <% end %>
152 | 
153 | In the map/reduce RA case, that attribute doesn't necessarily need to
154 | be indexed as in case 1 above. 
155 | 
156 | 
157 | ###  Caveat
158 | 
159 | In the simple document case, the query we'd use must be an equality
160 | one. The map-reduce case doesn't require so.
161 | 
162 | 
163 | The random attribute will work better if the results we're extracting
164 | from have a large number of documents. Consider for instance a query with few results:
165 | 
166 | <% code 'javascript' do %>
167 | > db.docs.save( { key : 1, random : Math.random() } )
168 | > db.docs.save( { key : 1, random : Math.random() } )
169 | > db.docs.find()
170 | { "_id" : ObjectId("4bfa9585cffdb770c08e7cc9"), "key" : 1, "random" : 0.9988383572723725 }
171 | { "_id" : ObjectId("4bfa9586cffdb770c08e7cca"), "key" : 1, "random" : 0.8338006548262672 }
172 | <% end %>
173 | 
174 | 
175 | The RA cannot be considered to be uniformly distributed between 0 and
176 | 1 for that key. The net effect is that some documents from the result
177 | would appear much often than others, when a random document form
178 | search criteria 'k : 1' is requested.
179 | 


--------------------------------------------------------------------------------
/content/patterns/finding_max_and_min.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title:      Finding Max And Min Values with Versioned Documents
  3 | created_at: 2010-05-24 20:16:24.036546 -05:00
  4 | recipe: true
  5 | author: Amos King
  6 | description: How to use MapReduce to find max/min values for a given set and property
  7 | filter:
  8 |   - erb
  9 |   - markdown
 10 | ---
 11 | 
 12 | ### Problem
 13 | 
 14 | You want to list the latest version numbers of a set of documents.  Each document contains
 15 | a field that represents the version of the document and a field representing which document
 16 | that is a version of:
 17 | 
 18 | <% code 'javascript' do %>
 19 | {
 20 |     "document_id" : "mongoDB How-To",
 21 |     "author" : "Amos King",
 22 |     "content" : "...",
 23 |     "version" : 1.0
 24 | }
 25 | <% end %>
 26 | 
 27 | We want to end up with a collection of document_ids and their largest version number:
 28 | 
 29 | <% code 'javascript' do %>
 30 | {"_id" : "mongoDB How To", "value" : 1.1}
 31 | {"_id" : "Resume", "value" : 6}
 32 | {"_id" : "Schema", "value" : 1}
 33 | <% end %>
 34 | 
 35 | ### Solution
 36 | 
 37 | Use the `mapreduce` database command.  Emit each document_id and version in the map function,
 38 | then use the reduce function to find the max version.
 39 | 
 40 | #### 1. Map
 41 | 
 42 | The map function is very simple.  We use our common element between all versions as the key and the version as the value:
 43 | 
 44 | <% code 'javascript' do %>
 45 | map = function () {
 46 |     emit(this.document_id, this.version);
 47 | }
 48 | <% end %>
 49 | 
 50 | #### 2. Reduce
 51 | 
 52 | The reduce function is also very simple but has a little bit of javascript magic.
 53 | Math.max normally takes in any number of arguments(ie. Math.max(1,2,3) ), but we
 54 | need to call it with an array. So we call Math.max with apply so that we can pass
 55 | in an array of values to max.  The apply breaks the array into individual arguments
 56 | to pass to Math.max.  The first argument to apply is the context in which we want to
 57 | run; Math will do fine here.
 58 | 
 59 | <% code 'javascript' do %>
 60 | reduce = function (key, values) {
 61 |     return Math.max.apply(Math, values);
 62 | }
 63 | <% end %>
 64 | 
 65 | Finding the minimum value is as easy as replacing Math.max with Math.min.
 66 | 
 67 | #### 3. Call the `mapreduce` command
 68 | 
 69 | Now it's time to get our result set. We'll set the output collection name parameter to 'newest_versions' so that we'll have an appropriately named set to work with:
 70 | 
 71 | <% code 'javascript' do %>
 72 | > result = db.runCommand({
 73 | ... "mapreduce" : "documents",
 74 | ... "map" : map,
 75 | ... "reduce" : reduce,
 76 | ... "out" : "newest_versions"})
 77 | <% end %>
 78 | 
 79 | Now, we query the 'newest_versions' collection. Each document is exactly what we're looking for:
 80 | 
 81 | <% code 'javascript' do %>
 82 | > db.newest_versions.find()
 83 | {"_id" : "mongoDB How To", "value" : 1.1}
 84 | {"_id" : "Resume", "value" : 6}
 85 | {"_id" : "Schema", "value" : 1}
 86 | <% end %>
 87 | 
 88 | ### Extras
 89 | 
 90 | The Map and Reduce Functions can be rewritten slightly to return the Maximum and Minimum versions of each document.  
 91 | 
 92 | For the purpose of this example, the input collection is as follows: (The _id values have been truncated for brevity.)
 93 | 
 94 | <% code 'javascript' do %>
 95 | > db.documents.find()
 96 | { "_id" : 1, "document_id" : "mongoDB How-To", "author" : "Amos King", "content" : "...", "version" : 1 }
 97 | { "_id" : 2, "document_id" : "mongoDB How-To", "author" : "Amos King", "content" : "...", "version" : 1.1 }
 98 | { "_id" : 3, "document_id" : "Resume", "author" : "Author", "content" : "...", "version" : 6 }
 99 | { "_id" : 4, "document_id" : "Schema", "author" : "Someone Else", "content" : "...", "version" : 0.9 }
100 | { "_id" : 5, "document_id" : "Schema", "author" : "Someone Else", "content" : "...", "version" : 1 }
101 | > 
102 | <% end %>
103 | 
104 | #### Map
105 | 
106 | The new Map function emits documents containing the document_id, and "value" key containing a list of embedded documents, each containing the keys, "max" and "min".  Both keys are initially set to be equal to the "version" key of the current document.  Because there is only one document containing the "document_id" : "Resume", this output will not need to be reduced.  
107 | 
108 | <% code 'javascript' do %>
109 | map = function () { 
110 |     emit(this.document_id, {max:this.version, min:this.version}); 
111 | }
112 | <% end %>
113 | 
114 | The Map function will emit something that looks like the following:
115 | 
116 | <% code 'javascript' do %>
117 | "mongoDB How-To", { "max" : 1, "min" : 1 }
118 | "mongoDB How-To", { "max" : 1.1, "min" : 1.1 }
119 | "Resume", { "max" : 6, "min" : 6 }
120 | "Schema", { "max" : 0.9, "min" : 0.9 }
121 | "Schema", { "max" : 1, "min" : 1 }
122 | <% end %>
123 | 
124 | #### Reduce
125 | 
126 | Next the Reduce function will be run to compress the data emit by the Map function.  The Reduce function requires an input of an id, and a list of values.  It must output an id and a single value, which in this case is a document containing the keys, "max" and "min".  The reduce function will interpret the data that has been emitted from the Map function as follows:
127 | 
128 | <% code 'javascript' do %>
129 | "mongoDB How-To", [{ "max" : 1, "min" : 1 }, { "max" : 1.1, "min" : 1.1 }]
130 | "Schema", [{ "max" : 0.9, "min" : 0.9 }, { "max" : 1, "min" : 1 }]
131 | <% end %>
132 | 
133 | The Reduce function will be run repeatedly, passing its previous output value as the new input, until the output list contains only one value.  
134 | 
135 | Notice that the id "Resume" is not passed to the Reduce function, because it only has one value associated with it.  This reduce function will find the maximum "max" value, and the minimum "min" value for each key.  It will be run twice; once for the id "Schema", and once for the id "mongoDB How-To".
136 | 
137 | <% code 'javascript' do %>
138 | reduce = function (key, values) {
139 |   max = values[0].max;
140 | 	min = values[0].min;
141 | 	if (values.length > 1){
142 | 		for(i in values){
143 | 			if(values[i].max > max){
144 | 				max = values[i].max;
145 | 			};
146 | 			if(values[i].min < min){
147 | 				min = values[i].min;
148 | 			};
149 | 		};
150 | 	};
151 | 	return {"max":max, "min":min};
152 | 	}
153 | }
154 | <% end %>
155 | 
156 | Running mapreduce will return the following:
157 | 
158 | <% code 'javascript' do %>
159 | > result = db.runCommand({"mapreduce" : "documents","map" : map,"reduce" : reduce,"out" : "newest_versions"})
160 | {
161 | 	"result" : "newest_versions",
162 | 	"timeMillis" : 2,
163 | 	"counts" : {
164 | 		"input" : 5,
165 | 		"emit" : 5,
166 | 		"reduce" : 2,
167 | 		"output" : 3
168 | 	},
169 | 	"ok" : 1
170 | }
171 | > db.newest_versions.find()
172 | { "_id" : "Resume", "value" : { "max" : 6, "min" : 6 } }
173 | { "_id" : "Schema", "value" : { "max" : 1, "min" : 0.9 } }
174 | { "_id" : "mongoDB How-To", "value" : { "max" : 1.1, "min" : 1 } }
175 | <% end %>
176 | 
177 | ### See Also
178 | 
179 | * The MongoDB [docs on mapreduce][1]
180 | 
181 |   [1]: http://www.mongodb.org/display/DOCS/MapReduce
182 | 


--------------------------------------------------------------------------------
/content/patterns/perform-two-phase-commits.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Perform Two Phase Commits
  3 | created_at: 2011-12-02
  4 | recipe: true
  5 | author: Antoine Girbal
  6 | description: Perform Two-Phase Commits with MongoDB
  7 | filter:
  8 |   - erb
  9 |   - markdown
 10 | ---
 11 | 
 12 | A common problem with non-relational database is that it is not
 13 | possible to do transactions across several documents. When executing a
 14 | transaction composed of several sequential operations, some issues
 15 | arise:
 16 | 
 17 | - Atomicity: it is difficult to rollback changes by previous
 18 |   operations if one fails.
 19 | 
 20 | - Isolation: changes to a single document are seen by concurrent
 21 |   processes, which may have an inconsistent view of the data during
 22 |   the transaction execution.
 23 | 
 24 | - Consistency: In case of a major failure (network, hardware) it is
 25 |   possible that the data will be left inconsistent and difficult to
 26 |   repair.
 27 | 
 28 | MongoDB provides atomicity for an operation on a single
 29 | document. Since documents can be fairly complex, this actually covers
 30 | many more cases than with a traditional DB. Still there are cases
 31 | where transactions across documents are needed, and that is when a
 32 | two-phase commit can be used. The two-phase commit is made possible by
 33 | the fact that documents are complex and can represent pending data and
 34 | states. This process makes sure that the data is eventually
 35 | consistent, which is usually what matters most to the system.
 36 | 
 37 | ## Account transfer example
 38 | 
 39 | ## Problem overview
 40 | 
 41 | The most common example of transaction is to transfer funds from
 42 | account A to B in a reliable way. With a traditional RDBMS, funds are
 43 | substracted from A and added to B within an atomic transaction. With
 44 | MongoDB, a viable solution is to use a two-phase commit.
 45 | 
 46 | Let's have one collection holding accounts:
 47 | 
 48 | <% code 'javascript' do %>
 49 | foo:PRIMARY> db.accounts.save({name: "A", balance: 1000, pendingTransactions: []})
 50 | foo:PRIMARY> db.accounts.save({name: "B", balance: 1000, pendingTransactions: []})
 51 | foo:PRIMARY> db.accounts.find()
 52 | { "_id" : ObjectId("4d7bc66cb8a04f512696151f"), "name" : "A", "balance" : 1000, "pendingTransactions" : [ ] }
 53 | { "_id" : ObjectId("4d7bc67bb8a04f5126961520"), "name" : "B", "balance" : 1000, "pendingTransactions" : [ ] }
 54 | {code}And we need one collection representing transactions:
 55 | {code}foo:PRIMARY> db.transactions.save({source: "A", destination: "B", value: 100, state: "initial"})
 56 | foo:PRIMARY> db.transactions.find()
 57 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "source" : "A", "destination" : "B", "value" : 100, "state" : "initial" }
 58 | <% end %>
 59 | 
 60 | 
 61 | ### Transaction description
 62 | 
 63 | **Step 1:** the transaction state is switched to, "pending":
 64 | 
 65 | <% code 'javascript' do %>
 66 | foo:PRIMARY> t = db.transactions.findOne({state: "initial"})
 67 | {
 68 | 	"_id" : ObjectId("4d7bc7a8b8a04f5126961522"),
 69 | 	"source" : "A",
 70 | 	"destination" : "B",
 71 | 	"value" : 100,
 72 | 	"state" : "initial"
 73 | }
 74 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "pending"}})
 75 | foo:PRIMARY> db.transactions.find()
 76 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "source" : "A", "destination" : "B", "value" : 100, "state" : "pending" }
 77 | <% end %>
 78 | 
 79 | **Step 2:** apply the transaction to both accounts, and make sure the
 80 | transaction is not already pending:
 81 | 
 82 | <% code 'javascript' do %>
 83 | foo:PRIMARY> db.accounts.update({name: t.source, pendingTransactions: {$ne: t._id}}, {$inc: {balance: -t.value}, $push: {pendingTransactions: t._id}})
 84 | foo:PRIMARY> db.accounts.update({name: t.destination, pendingTransactions: {$ne: t._id}}, {$inc: {balance: t.value}, $push: {pendingTransactions: t._id}})
 85 | foo:PRIMARY> db.accounts.find()
 86 | { "_id" : ObjectId("4d7bc97fb8a04f5126961523"), "balance" : 900, "name" : "A", "pendingTransactions" : [ ObjectId("4d7bc7a8b8a04f5126961522") ] }
 87 | { "_id" : ObjectId("4d7bc984b8a04f5126961524"), "balance" : 1100, "name" : "B", "pendingTransactions" : [ ObjectId("4d7bc7a8b8a04f5126961522") ] }
 88 | <% end %>
 89 | 
 90 | **Step 3:** set the transaction's state to "committed":
 91 | 
 92 | <% code 'javascript' do %>
 93 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "committed"}})
 94 | foo:PRIMARY> db.transactions.find()
 95 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "destination" : "B", "source" : "A", "state" : "committed", "value" : 100 }
 96 | <% end %>
 97 | 
 98 | **Step 4:** remove the pending transaction from accounts:
 99 | 
100 | <% code 'javascript' do %>
101 | foo:PRIMARY> db.accounts.update({name: t.source}, {$pull: {pendingTransactions: ObjectId("4d7bc7a8b8a04f5126961522")}})
102 | foo:PRIMARY> db.accounts.update({name: t.destination}, {$pull: {pendingTransactions: ObjectId("4d7bc7a8b8a04f5126961522")}})
103 | foo:PRIMARY> db.accounts.find()
104 | { "_id" : ObjectId("4d7bc97fb8a04f5126961523"), "balance" : 900, "name" : "A", "pendingTransactions" : [ ] }
105 | { "_id" : ObjectId("4d7bc984b8a04f5126961524"), "balance" : 1100, "name" : "B", "pendingTransactions" : [ ] }
106 | <% end %>
107 | 
108 | **Step 5:** set transaction's state to "done":
109 | 
110 | <% code 'javascript' do %>
111 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "done"}})
112 | foo:PRIMARY> db.transactions.find()
113 | { "_id" : ObjectId("4d7bc7a8b8a04f5126961522"), "destination" : "B", "source" : "A", "state" : "done", "value" : 100 }
114 | <% end %>
115 | 
116 | ### Failure scenarios
117 | 
118 | Now let's look at the failure scenarios and how to deal with them. For
119 | example, a failure can be that the application making the sequential
120 | operations suddenly dies, and is restarted.
121 | 
122 | Cases to cover:
123 | 
124 | - any failure between after step 1 and before step 3: Application
125 |   should get a list of transactions in state "pending" and resume from
126 |   step 2.
127 | 
128 | - any failure after step 3 and before step 5: Application should get a
129 |   list of transactions in state "applied" and resume from step 4.
130 | 
131 | Application is thus always able to resume the transaction and
132 | eventually get to a consistent state. These "repair" jobs should be
133 | run at application startup and possibly at regular interval to catch
134 | any unfinished transaction. The time it takes to get to a consistent
135 | state may vary depending on how long it takes to resume a failed
136 | transaction.
137 | 
138 | ### Rollback
139 | 
140 | A common need may be to rollback a transaction, either because it has
141 | been cancelled or because it can never succeed (e.g. account B is
142 | closed).
143 | 
144 | Two cases:
145 | 
146 | - after step 3, the transaction is considered committed and should not
147 |   be rolled back. Instead, to undo the transaction, a new transaction
148 |   can be created with an opposite source and destination.
149 | 
150 | - after step 1 and before step 3: the process below should be applied.
151 | 
152 | **Step 1:** set the transaction's state to "canceling":
153 | 
154 | <% code 'javascript' do %>
155 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "canceling"}})
156 | <% end %>
157 | 
158 | **Step 2:** undo the transaction from accounts:
159 | 
160 | <% code 'javascript' do %>
161 | foo:PRIMARY> db.accounts.update({name: t.source, pendingTransactions: t._id}, {$inc: {balance: t.value}, $pull: {pendingTransactions: t._id}})
162 | foo:PRIMARY> db.accounts.update({name: t.destination, pendingTransactions: t._id}, {$inc: {balance: -t.value}, $pull: {pendingTransactions: t._id}})
163 | foo:PRIMARY> db.accounts.find()
164 | { "_id" : ObjectId("4d7bc97fb8a04f5126961523"), "balance" : 1000, "name" : "A", "pendingTransactions" : [ ] }
165 | { "_id" : ObjectId("4d7bc984b8a04f5126961524"), "balance" : 1000, "name" : "B", "pendingTransactions" : [ ] }
166 | <% end %>
167 | 
168 | **Step 3:** set the transaction's state to "cancelled":
169 | 
170 | <% code 'javascript' do %>
171 | foo:PRIMARY> db.transactions.update({_id: t._id}, {$set: {state: "cancelled"}})
172 | <% end %>
173 | 
174 | ### Multiple applications
175 | 
176 | A common issue that exists with any DBs is how to make it safe for
177 | several applications to run transactions. It is important that only 1
178 | application handles a given transaction at one point in time, because
179 | otherwise conflicts can happen.
180 | 
181 | One example is:
182 | 
183 | - application A1 and A2 both grab transaction T1 which is in "initial"
184 |   state.
185 | 
186 | - A1 applies the whole transaction before A2 starts
187 | 
188 | - A2 applies transaction a 2nd time because it does not appear as
189 |   pending in the accounts
190 | 
191 | To handle multiple applications, there should be a marker at the
192 | transaction level that the transaction is being handled. One can use
193 | findAndModify:
194 | 
195 | <% code 'javascript' do %>
196 | foo:PRIMARY> t = db.transactions.findAndModify({query: {state: "initial", application: {$exists: 0}}, update: {$set: {state: "pending", application: "A1"}}, new: true})
197 | {
198 | 	"_id" : ObjectId("4d7be8af2c10315c0847fc85"),
199 | 	"application" : "A1",
200 | 	"destination" : "B",
201 | 	"source" : "A",
202 | 	"state" : "pending",
203 | 	"value" : 150
204 | }
205 | <% end %>
206 | 
207 | The only remaining issue is if application A1 dies during transaction
208 | execution.The resume processes described in "Failure scenarios" can be
209 | applied, but application should make sure it owns the
210 | transactions. For example to resume pending jobs, query should be:
211 | 
212 | <% code 'javascript' do %>
213 | foo:PRIMARY> db.transactions.find({application: "A1", state: "pending"})
214 | { "_id" : ObjectId("4d7be8af2c10315c0847fc85"), "application" : "A1", "destination" : "B", "source" : "A", "state" : "pending", "value" : 150 }
215 | <% end %>
216 | 
217 | ### Proper two-phase commit
218 | 
219 | This implementation tries to be simple on purpose, it assumes that:
220 | 
221 | - an account operation can always be rolled back
222 | 
223 | - the account balance can go negative
224 | 
225 | A proper real world implementation would probably differ:
226 | 
227 | - accounts have both a current balance, pending credits, pending
228 |   debits.
229 | 
230 | - during step 2, application makes sure accounts has sufficient funds
231 |   for transaction, modifies credits/debits and adds transaction as
232 |   pending, all in one update.
233 | 
234 | - during step 4, application actually applies the transaction on
235 |   balance, modifies credits/debits and removes transaction from
236 |   pending, all in one update.
237 | 
238 | ### Additional notes:
239 | 
240 | In the context of important transactions, you will probably want to use:
241 | 
242 | - reasonable "getLastError" to check that operations are actually
243 |   written to the DB (see "getLastError" or "write concern" for your
244 |   drivers).
245 | 
246 | - durability so that operations are consistently saved on disk when an
247 |   operation returns successfully.
248 | 


--------------------------------------------------------------------------------
/content/operations/convert-replica-set-to-replicated-shard-cluster.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Converting a Replica Set to a Replicated Shard Cluster
  3 | created_at: 2011-11-30
  4 | recipe: true
  5 | author: Marc Bastien
  6 | filter:
  7 |   - erb
  8 |   - markdown
  9 | ---
 10 | 
 11 | ## Overview
 12 | 
 13 | This tutorial documents the process for converting a single 3-member
 14 | replica set to a shard cluster that consists of 2 shards. Each shard
 15 | will consist of an independent 3-member replica set.
 16 | 
 17 | The procedure that follows uses a test environment running on a local
 18 | system (i.e. localhost,) and has been tested.  You should feel
 19 | encouraged to "follow along at home." In a production environment or
 20 | one with multiple systems, use the same process except where noted.
 21 | 
 22 | In brief, the process is as follows:
 23 | 
 24 | 1. Create or select an existing 3-member replica set, and insert
 25 |    some data into a collection.
 26 | 
 27 | 2. Start the config servers and create a shard cluster with a single
 28 |    shard.
 29 | 
 30 | 3. Create a second replica set with three new `mongod` processes.
 31 | 
 32 | 4. Add the second replica set to the sharded cluster.
 33 | 
 34 | 5. Enable sharding on the desired collection or collections.
 35 | 
 36 | ## Process
 37 | 
 38 | ### 1. Set up a Three Member Replica Set and Insert Test Data
 39 | 
 40 | #### 1.1. Create Directories for First Replica Set Instance
 41 | 
 42 | Create the following  data directories for the members of the
 43 | first replica set, named firstset:
 44 | 
 45 | - `/data/example/firstset1`
 46 | - `/data/example/firstset2`
 47 | - `/data/example/firstset3`
 48 | 
 49 | #### 1.2. Start Three `mongod` instances
 50 | 
 51 | Run each command in a separate terminal window or GNU Screen window.
 52 | 
 53 | <% code 'javascript' do %>
 54 | $ bin/mongod --dbpath /data/example/firstset1 --port 10001 --replSet firstset --oplogSize 700 --rest
 55 | <% end %>
 56 | 
 57 | <% code 'javascript' do %>
 58 | $ bin/mongod --dbpath /data/example/firstset2 --port 10002 --replSet firstset --oplogSize 700 --rest
 59 | <% end %>
 60 | 
 61 | <% code 'javascript' do %>
 62 | $ bin/mongod --dbpath /data/example/firstset3 --port 10003 --replSet firstset --oplogSize 700 --rest
 63 | <% end %>
 64 | 
 65 | **Note:** Here, the "`--oplogSize 700`" option restricts the size of
 66 | the operation log (i.e. oplog) for each `mongod` process to
 67 | 700MB. Without the `--oplogSize` option, each `mongod` will reserve
 68 | approximately 5% of the free disk space on the volume. By limiting the
 69 | size of the oplog, each process will start more quickly. Omit this setting
 70 | in production environments.
 71 | 
 72 | #### 1.3 Connect to One MongoDB Instance with `mongo` shell
 73 | 
 74 | Run the following command in a new terminal to connect to a node.
 75 | 
 76 | <% code 'javascript' do %>
 77 | $ bin/mongo localhost:10001/admin
 78 | MongoDB shell version: 2.0.2-rc1
 79 | connecting to: localhost:10001/admin
 80 | >
 81 | <% end %>
 82 | 
 83 | **Note:** Above and hereafter, if you are running in a production
 84 | environment or are testing this process with `mongod` instances on
 85 | multiple systems replace "localhost" with a resolvable domain,
 86 | hostname, or the IP address of your system.
 87 | 
 88 | #### 1.4. Initialize the First Replica Set
 89 | 
 90 | <% code 'javascript' do %>
 91 | > db.runCommand({"replSetInitiate" : {"_id" : "firstset", "members" : [{"_id" : 1, "host" : "localhost:10001"}, {"_id" : 2, "host" : "localhost:10002"}, {"_id" : 3, "host" : "localhost:10003"}]}})
 92 | {
 93 | 	"info" : "Config now saved locally.  Should come online in about a minute.",
 94 | 	"ok" : 1
 95 | }
 96 | <% end %>
 97 | 
 98 | #### 1.5. Create and Populate a New Collection
 99 | 
100 | The following JavScript writes one million documents to the
101 | collection "`test_collection`" in the following form:
102 | 
103 | <% code 'javascript' do %>
104 | { "_id" : ObjectId("4ed5420b8fc1dd1df5886f70"), "name" : "Greg", "user_id" : 4, "boolean" : true, "added_at" : ISODate("2011-11-29T20:35:23.121Z"), "number" : 74 }
105 | <% end %>
106 | 
107 | Use the following sequence of operations from the `mongo` prompt.
108 | 
109 | <% code 'javascript' do %>
110 | PRIMARY> use test
111 | switched to db test
112 | PRIMARY> people = ["Marc", "Bill", "George", "Eliot", "Matt", "Trey", "Tracy", "Greg", "Steve", "Kristina", "Katie", "Jeff"];
113 | PRIMARY> for(var i=0; i<1000000; i++){
114 |      name = people[Math.floor(Math.random()*people.length)];
115 |      user_id = i;
116 |      boolean = [true, false][Math.floor(Math.random()*2)];
117 |      added_at = new Date();
118 |      number = Math.floor(Math.random()*10001);
119 |      db.test_collection.save({"name":name, "user_id":user_id, "boolean": boolean, "added_at":added_at, "number":number });
120 | }
121 | <% end %>
122 | 
123 | Creating and fully replicating one million documents in the `mongo`
124 | shell may take several minutes depending on your system.
125 | 
126 | ### 2. Start the "config" Instances and Create a Cluster a Single Shard
127 | 
128 | **Note:** For development and testing environments, a single config
129 | server is sufficient, in production environments, use three config
130 | servers. Because config instances only store the *metadata* for the
131 | shard cluster, they have minimal resource requirements.
132 | 
133 | These instructions specify creating three config servers.
134 | 
135 | #### 2.1. Create Directories for Config Instances
136 | 
137 | Create the following  data directories for each of the config
138 | instances:
139 | 
140 | - `/data/example/config1`
141 | - `/data/example/config2`
142 | - `/data/example/config3`
143 | 
144 | #### 2.2. Start the config Servers
145 | 
146 | Run each command in a separate terminal window or GNU Screen window.
147 | 
148 | <% code 'javascript' do %>
149 | $ bin/mongod --configsvr --dbpath /data/example/config1 --port 20001
150 | <% end %>
151 | 
152 | <% code 'javascript' do %>
153 | $ bin/mongod --configsvr --dbpath /data/example/config2 --port 20002
154 | <% end %>
155 | 
156 | <% code 'javascript' do %>
157 | $ bin/mongod --configsvr --dbpath /data/example/config3 --port 20003
158 | <% end %>
159 | 
160 | #### 2.3. Start `mongos`
161 | 
162 | Run the following command to start a `mongos` instance. Run this
163 | command in a new terminal window or GNU Screen window.
164 | 
165 | <% code 'javascript' do %>
166 | $ bin/mongos --configdb localhost:20001,localhost:20002,localhost:20003 --port 27017 --chunkSize 1
167 | <% end %>
168 | 
169 | **Note:** If you are using the collection created earlier, or are
170 | just experimenting with sharding, you can use a small --chunkSize (1MB
171 | works well.) The default chunkSize of 64MB, means that your cluster
172 | will need to have 64MB of data before the MongoDB's automatic sharding
173 | begins working. In production environments, do not use a small shard
174 | size.
175 | 
176 | The `configdb` options specify the *configuration servers*
177 | (e.g. `localhost:20001`, `localhost:20002`, and `localhost:2003`). The
178 | `mongos` process runs on the default "MongoDB" port (i.e. `27017`),
179 | while the databases themselves, in this example, are running on ports in the
180 | `30001` series. In the above example, since `27017` is the default
181 | port, the option "`--port 27017`" may be omitted.  It is included here
182 | only as an example.
183 | 
184 | #### 2.4. Add the first shard in `mongos`
185 | 
186 | In in a new terminal window or GNU Screen session, add the first
187 | shard, using the following procedure:
188 | 
189 | <% code 'javascript' do %>
190 | $ bin/mongo localhost:27017/admin
191 | MongoDB shell version: 2.0.2-rc1
192 | connecting to: localhost:27017/admin
193 | mongos> db.runCommand( { addshard : "firstset/localhost:10001,localhost:10002,localhost:10003" } )
194 | { "shardAdded" : "firstset", "ok" : 1 }
195 | mongos>
196 | <% end %>
197 | 
198 | ### 3. Create a second replica set with three new mongod processes
199 | 
200 | #### 3.1. Create Directories for Second Replica Set Instance
201 | 
202 | Create the following  data directories for the members of the
203 | second replica set, named secondset:
204 | 
205 | - `/data/example/secondset1`
206 | - `/data/example/secondset2`
207 | - `/data/example/secondset3`
208 | 
209 | #### 3.2. Start three instances of mongod in three new terminal windows
210 | 
211 | <% code 'javascript' do %>
212 | $ bin/mongod --dbpath /data/example/secondset1 --port 10004 --replSet secondset --oplogSize 700 --rest
213 | <% end %>
214 | 
215 | <% code 'javascript' do %>
216 | $ bin/mongod --dbpath /data/example/secondset2 --port 10005 --replSet secondset --oplogSize 700 --rest
217 | <% end %>
218 | 
219 | <% code 'javascript' do %>
220 | $ bin/mongod --dbpath /data/example/secondset3 --port 10006 --replSet secondset --oplogSize 700 --rest
221 | <% end %>
222 | 
223 | NOTE: As in 1.2, this set uses the smaller `oplogSize`
224 | configuration. Omit this setting in production environments.
225 | 
226 | #### 3.3. Connect to One MongoDB Instance with `mongo` shell
227 | 
228 | <% code 'javascript' do %>
229 | $ bin/mongo localhost:10004/admin
230 | MongoDB shell version: 2.0.2-rc1
231 | connecting to: localhost:10004/admin
232 | >
233 | <% end %>
234 | 
235 | #### 3.4. Initialize the Second Replica Set
236 | 
237 | <% code 'javascript' do %>
238 | > db.runCommand({"replSetInitiate" : {"_id" : "secondset", "members" : [{"_id" : 1, "host" : "localhost:10004"}, {"_id" : 2, "host" : "localhost:10005"}, {"_id" : 3, "host" : "localhost:10006"}]}})
239 | {
240 | 	"info" : "Config now saved locally.  Should come online in about a minute.",
241 | 	"ok" : 1
242 | }
243 | <% end %>
244 | 
245 | ### 4. Add the Second Replica Set to the Shard Cluster
246 | 
247 | In a connection to the `mongos` instance (created above), follow the
248 | below procedure.
249 | 
250 | <% code 'javascript' do %>
251 | mongos> use admin
252 | switched to db admin
253 | mongos> db.runCommand( { addshard : "secondset/localhost:10004,localhost:10005,localhost:10006" } )
254 | { "shardAdded" : "secondset", "ok" : 1 }
255 | <% end %>
256 | 
257 | You can verify that both shards are properly configured by running the
258 | `listshards` command. View this and example output below:
259 | 
260 | <% code 'javascript' do %>
261 | mongos> db.runCommand({listshards:1})
262 | {
263 | 	"shards" : [
264 | 		{
265 | 			"_id" : "firstset",
266 | 			"host" : "firstset/localhost:10001,localhost:10003,localhost:10002"
267 | 		},
268 | 		{
269 | 			"_id" : "secondset",
270 | 			"host" : "secondset/localhost:10004,localhost:10006,localhost:10005"
271 | 		}
272 | 	],
273 | 	"ok" : 1
274 | }
275 | <% end %>
276 | 
277 | ### 5. Enable Sharding
278 | 
279 | Sharding in MongoDB must be enabled on *both* the database and
280 | collection levels.
281 | 
282 | #### 5.1. Enabling Sharding on the Database Level
283 | 
284 | Issue the `enablesharding` command. The "`test`" argument specifies
285 | the name of the database. See the following example:
286 | 
287 | <% code 'javascript' do %>
288 | mongos> db.runCommand( { enablesharding : "test" } )
289 | { "ok" : 1 }
290 | <% end %>
291 | 
292 | #### 5.2. Create an Index on the Shard Key
293 | 
294 | Create an index on the shard key. The shard key is used by MongoDB to
295 | distribute documents between shards. Once selected the shard key 
296 | cannot be changed. Good shard keys:
297 | 
298 | - will have values that are evenly distributed among all documents,
299 | - group documents that are likely to be accessed at the same time in
300 |   contiguous chunks, and
301 | - allow for effective distribution of activity among shards.
302 | 
303 | Typically shard keys are compound, comprising of some sort of hash and
304 | some sort of other primary key. Selecting a shard key, depends on your
305 | data set, application architecture, and usage pattern, and is beyond
306 | the scope of this document. For the purposes of this example, we will
307 | shard the "number" key in the data inserted above. This would
308 | typically not a good shard key for production deployments.
309 | 
310 | Create the index with the following procedure:
311 | 
312 | <% code 'javascript' do %>
313 | mongos> use test
314 | switched to db test
315 | mongos> db.test_collection.ensureIndex({number:1})
316 | <% end %>
317 | 
318 | #### 5.3. Shard the Collection
319 | 
320 | Issue the following command to shard the collection:
321 | 
322 | <% code 'javascript' do %>
323 | mongos> use admin
324 | switched to db admin
325 | mongos> db.runCommand( { shardcollection : "test.test_collection", key : {"number":1} })
326 | { "collectionsharded" : "test.test_collection", "ok" : 1 }
327 | mongos>
328 | <% end %>
329 | 
330 | The collection "`test_collection`" is now sharded!
331 | 
332 | Over the next few minutes the Balancer will begin to redistribute
333 | chunks of documents. You can confirm this activity by switching to the
334 | `test` database and running `db.stats()` or `db.printShardingStatus()`.
335 | 
336 | Additional documents that are added to this collection will be distributed evenly between the shards.  
337 | 
338 | See the following examples:
339 | 
340 | <% code 'javascript' do %>
341 | mongos> use test
342 | switched to db test
343 | mongos> db.stats()
344 | {
345 | 	"raw" : {
346 | 		"firstset/localhost:10001,localhost:10003,localhost:10002" : {
347 | 			"db" : "test",
348 | 			"collections" : 3,
349 | 			"objects" : 973887,
350 | 			"avgObjSize" : 100.33173458522396,
351 | 			"dataSize" : 97711772,
352 | 			"storageSize" : 141258752,
353 | 			"numExtents" : 15,
354 | 			"indexes" : 2,
355 | 			"indexSize" : 56978544,
356 | 			"fileSize" : 1006632960,
357 | 			"nsSizeMB" : 16,
358 | 			"ok" : 1
359 | 		},
360 | 		"secondset/localhost:10004,localhost:10006,localhost:10005" : {
361 | 			"db" : "test",
362 | 			"collections" : 3,
363 | 			"objects" : 26125,
364 | 			"avgObjSize" : 100.33286124401914,
365 | 			"dataSize" : 2621196,
366 | 			"storageSize" : 11194368,
367 | 			"numExtents" : 8,
368 | 			"indexes" : 2,
369 | 			"indexSize" : 2093056,
370 | 			"fileSize" : 201326592,
371 | 			"nsSizeMB" : 16,
372 | 			"ok" : 1
373 | 		}
374 | 	},
375 | 	"objects" : 1000012,
376 | 	"avgObjSize" : 100.33176401883178,
377 | 	"dataSize" : 100332968,
378 | 	"storageSize" : 152453120,
379 | 	"numExtents" : 23,
380 | 	"indexes" : 4,
381 | 	"indexSize" : 59071600,
382 | 	"fileSize" : 1207959552,
383 | 	"ok" : 1
384 | }
385 | mongos> db.printShardingStatus()
386 | --- Sharding Status ---
387 |   sharding version: { "_id" : 1, "version" : 3 }
388 |   shards:
389 | 	{  "_id" : "firstset",  "host" : "firstset/localhost:10001,localhost:10003,localhost:10002" }
390 | 	{  "_id" : "secondset",  "host" : "secondset/localhost:10004,localhost:10006,localhost:10005" }
391 |   databases:
392 | 	{  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }
393 | 	{  "_id" : "test",  "partitioned" : true,  "primary" : "firstset" }
394 | 		test.test_collection chunks:
395 | 				secondset	5
396 | 				firstset	186
397 | 			too many chunks to print, use verbose if you want to force print
398 | 
399 | mongos> db.stats()
400 | {
401 | 	"raw" : {
402 | 		"firstset/localhost:10001,localhost:10003,localhost:10002" : {
403 | 			"db" : "test",
404 | 			"collections" : 3,
405 | 			"objects" : 910960,
406 | 			"avgObjSize" : 100.33197066830596,
407 | 			"dataSize" : 91398412,
408 | 			"storageSize" : 141258752,
409 | 			"numExtents" : 15,
410 | 			"indexes" : 2,
411 | 			"indexSize" : 55400576,
412 | 			"fileSize" : 1006632960,
413 | 			"nsSizeMB" : 16,
414 | 			"ok" : 1
415 | 		},
416 | 		"secondset/localhost:10004,localhost:10006,localhost:10005" : {
417 | 			"db" : "test",
418 | 			"collections" : 3,
419 | 			"objects" : 89052,
420 | 			"avgObjSize" : 100.32942550419979,
421 | 			"dataSize" : 8934536,
422 | 			"storageSize" : 11194368,
423 | 			"numExtents" : 8,
424 | 			"indexes" : 2,
425 | 			"indexSize" : 7178528,
426 | 			"fileSize" : 201326592,
427 | 			"nsSizeMB" : 16,
428 | 			"ok" : 1
429 | 		}
430 | 	},
431 | 	"objects" : 1000012,
432 | 	"avgObjSize" : 100.33174401907178,
433 | 	"dataSize" : 100332948,
434 | 	"storageSize" : 152453120,
435 | 	"numExtents" : 23,
436 | 	"indexes" : 4,
437 | 	"indexSize" : 62579104,
438 | 	"fileSize" : 1207959552,
439 | 	"ok" : 1
440 | }
441 | mongos> db.printShardingStatus()
442 | --- Sharding Status ---
443 |   sharding version: { "_id" : 1, "version" : 3 }
444 |   shards:
445 | 	{  "_id" : "firstset",  "host" : "firstset/localhost:10001,localhost:10003,localhost:10002" }
446 | 	{  "_id" : "secondset",  "host" : "secondset/localhost:10004,localhost:10006,localhost:10005" }
447 |   databases:
448 | 	{  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }
449 | 	{  "_id" : "test",  "partitioned" : true,  "primary" : "secondset" }
450 | 		test.test_collection chunks:
451 | 				secondset	17
452 | 				firstset	174
453 | 			too many chunks to print, use verbose if you want to force print
454 | mongos>
455 | <% end %>
456 | 
457 | The above demonstrates that, chunks are migrated to the shard on
458 | replica set "secondset" over time.
459 | 


--------------------------------------------------------------------------------