├── .gitignore ├── package.json ├── server.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | node_modules -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mongo-search", 3 | "version": "0.0.1", 4 | "description": "Full-text Search using MongoDB Native Nodejs", 5 | "main": "server.js", 6 | "dependencies": { 7 | "mongodb": "~1.3.23", 8 | "twitter": "~0.2.9" 9 | }, 10 | "devDependencies": {}, 11 | "scripts": { 12 | "test": "node server", 13 | "start": "node server.js" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "git://github.com/ideaq/mongo-search.git" 18 | }, 19 | "keywords": [ 20 | "Mongodb", 21 | "Native", 22 | "Full-text", 23 | "Search", 24 | "Made", 25 | "Easy" 26 | ], 27 | "author": "@nelsonic & friends", 28 | "license": "MIT", 29 | "bugs": { 30 | "url": "https://github.com/ideaq/mongo-search/issues" 31 | }, 32 | "homepage": "https://github.com/ideaq/mongo-search" 33 | } 34 | -------------------------------------------------------------------------------- /server.js: -------------------------------------------------------------------------------- 1 | // keywords to find in tweets 2 | var KEYWORDS = "learned, learnt, homework, science, math, maths, physics, chemistry"; // add keywords separated by spaces. 3 | // KEYWORDS = "katie, justin, kim, beyonce, 1DWorld, OMG, FML, news, breaking"; // for *LOTS* of tweets. 4 | // KEYWORDS = "idea"; 5 | var twitter = require('twitter'), 6 | twit = new twitter({ 7 | consumer_key: 'U8N2QzFu6Hv4BB3BjObIy9HDF', 8 | consumer_secret: 'rJWtj5NneVWmfT8STB7YN6IBkLreke9JoJhP3nIe0ffnBq91Xv', 9 | access_token_key: '2389016353-4tCDaVgRFkkNsWOj1sb6fZQ8s0bINqD5jJGmqRC', 10 | access_token_secret: 'OEFnemh9FlSkOX5YuNP46XsDh3EutbHiiKq6q8wV2Pwko' 11 | }); 12 | 13 | var SEARCH_INDEX = "post_search_index"; 14 | var SEARCH_KEYWORDS = "math", 15 | _db, _sr, _posts; // global DB handles 16 | 17 | var MongoClient = require('mongodb').MongoClient; 18 | 19 | MongoClient.connect('mongodb://127.0.0.1:27017/meteor', function(err, db) { 20 | if(err) throw err; 21 | _posts = db.collection('posts'); 22 | _sr = db.collection('search_results'); 23 | _db = db; // export the database handle 24 | fetchTweets(_posts); 25 | 26 | // wait 10 seconds for some data then create full-text index 27 | // setTimeout(function(){ 28 | // createIndex(_posts); 29 | // },10000) 30 | 31 | // search('science'); 32 | }) // end MongoClient 33 | 34 | // 35 | function search(keywords){ 36 | console.log("- - - > SEARCHING for ",keywords, " < - - - "); 37 | _db.command({text:"posts" , search: keywords }, function(err, res){ 38 | if(err) console.log(err); 39 | 40 | var record = {}; 41 | record.keywords = keywords; 42 | record.last_updated = new Date(); 43 | record.posts = []; 44 | 45 | if (res.results && res.results.length > 0){ 46 | console.log("EXAMPLE:",res.results[0]); 47 | 48 | for(var i in res.results){ 49 | // console.log(i, res.results[i].score, res.results[i].obj._id); 50 | record.posts.push({ 51 | "_id":res.results[i].obj._id.toString(), 52 | "score":res.results[i].score 53 | }); 54 | } 55 | 56 | // check if an SR record already exists for this keyword 57 | _sr.findOne({"keywords":keywords}, function(err, items) { 58 | if(err) console.log(err); 59 | console.log(items); 60 | if(items && items._id){ 61 | record._id = items._id; 62 | // upsert the results record 63 | _sr.update(record, { upsert: true }, function(err,info){ 64 | if(err) console.log(err); 65 | // console.log("INFO",info); 66 | }); 67 | } else { 68 | // insert new search results record 69 | _sr.insert(record, function(err,info){ 70 | if(err) console.log(err); 71 | console.log("INFO",info); 72 | }); 73 | } 74 | 75 | }) // end findOne (search results lookup for keywords) 76 | } else { // no search results 77 | console.log('no results'); 78 | _sr.insert(record, function(err,info){ 79 | if(err) console.log(err); 80 | console.log("INFO",info); 81 | }); 82 | } 83 | console.log("- - - > FOUND Results for ",keywords, " < - - - "); 84 | }); // end command (search) 85 | } 86 | 87 | 88 | function createIndex(collection) { 89 | collection.indexInformation(function(err, index) { // all indexes on posts collection 90 | // console.dir(index); 91 | // console.log(typeof index) 92 | if(typeof index[SEARCH_INDEX] === 'undefined'){ 93 | // create index 94 | collection.ensureIndex( { text: 'text' }, { 95 | name: SEARCH_INDEX, 96 | background:true 97 | }, function(err, info){ 98 | if(err) throw err; 99 | // console.dir(info); 100 | }); 101 | } 102 | }); 103 | } 104 | 105 | function fetchTweets(collection){ 106 | // console.log(twit); 107 | twit.stream("statuses/filter", { track: KEYWORDS, 'lang':'en' }, function(stream) { 108 | stream.on('data', function(data) { 109 | var tweet = extractTweet(data); 110 | collection.insert(tweet, function(err, docs) { 111 | 112 | collection.count(function(err, count) { 113 | console.log(count, tweet.user, tweet.text); 114 | }); 115 | 116 | }); // end collection.insert 117 | }); // end stream.on 118 | }); // end twit.stream 119 | } 120 | 121 | 122 | function extractTweet(data) { 123 | var tweet = {}; 124 | tweet.text = data.text; 125 | tweet.time = new Date(Date.parse(data.created_at)); // date objecte sortable 126 | tweet.avatar = data.user && data.user.profile_image_url || ''; 127 | // console.log(data.user.screen_name); 128 | tweet.user = data.user.screen_name 129 | // extract images where available: 130 | if(data.entities && data.entities.media && data.entities.media[0].media_url){ 131 | // console.log(data.entities.media[0].media_url); 132 | tweet.img = data.entities.media[0].media_url; 133 | 134 | } 135 | if(data.retweeted_status && parseInt(data.retweeted_status.retweet_count, 10) > 0){ 136 | // console.log(data) 137 | } 138 | // console.log(data.text) 139 | // if(data.lang === 'en') { // && tweet.img) { 140 | if(tweet.text.indexOf("#") !== -1) { 141 | // insertTweet(tweet); 142 | } 143 | tweet.lang = data.lang; 144 | return tweet; 145 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MongoDB Search 2 | ============== 3 | 4 | Experiment with Full-Text-Search using Node.js and MongoDB 5 | 6 | ## WHY 7 | 8 | Finding relevance in an ocean of content. 9 | 10 | Having spent the last couple of days 11 | [*wrestling* with Meteor + Search](https://github.com/ideaq/meteor-search) 12 | trying to get it let me search through a collection 13 | I've decided to take a different approach to the probrem. 14 | 15 | Hopefully by the end of this exercise we will be able to search through 16 | a collection of "posts" (tweets) and find relevant results. 17 | 18 | ## WHAT 19 | 20 | ***Full-text search*** *without* having to manage *Solr or ElasticSearch*. 21 | (keeping it simple with just *one* data store) 22 | 23 | 24 | ## HOW 25 | 26 | ### Ensure You have MongoDB Installed 27 | 28 | see: https://www.mongodb.com/docs/manual/installation/ 29 | 30 | ### Going Native 31 | 32 | In the past I've used [Mongoose](https://mongoosejs.com/) to interact with 33 | MongoDB. Mongoose offers many great abstractions when building applications 34 | specifically around pre-defining models and providing constructors to 35 | validate fields on insert/update. We don't need that here. 36 | 37 | All we need is the ability to: 38 | 39 | - [x] Connect to MongoDB (Local) 40 | - [x] Insert records into a collection 41 | - [x] Create a text index for the text field in the collection 42 | - [x] Execute a text (search) query against the records in the collection 43 | - [x] Return a list of all the record (_ids) that match the search criteria 44 | - [x] Store the results in a "results" collection to speed up future searches 45 | 46 | For these simple tasks the ***Node MongoDB Native*** client is *perfect*. 47 | 48 | - https://github.com/mongodb/node-mongodb-native 49 | 50 | Install the node-mongodb-native node module using NPM: 51 | 52 | ``` 53 | npm install mongodb 54 | ``` 55 | 56 | #### Startup The Meteor Mongo DB 57 | 58 | Use this command to start Mongo with **textSearchEnabled**: 59 | 60 | mongod --bind_ip 127.0.0.1 --dbpath ~/code/meteor-search/.meteor/local/db --setParameter textSearchEnabled=true 61 | 62 | 63 | ### Create the Index 64 | 65 | ``` 66 | MongoClient.connect('mongodb://127.0.0.1:27017/meteor', function(err, db) { 67 | if(err) throw err; 68 | var posts = db.collection('posts'); 69 | posts.indexInformation(function(err, info) { // all indexes on posts collection 70 | console.dir(info); 71 | }); 72 | }) // end MongoClient 73 | 74 | ``` 75 | Output: 76 | ``` 77 | { 78 | _id_: [ [ '_id', 1 ] ], 79 | post_search_index: [ [ '_fts', 'text' ], [ '_ftsx', 1 ] ] 80 | } 81 | ``` 82 | *ignore* the _id_ index that's a mongodb default. 83 | the one which interests us is the post_search_index we created earlier. 84 | 85 | - https://mongodb.github.io/node-mongodb-native/markdown-docs/indexes.html 86 | 87 | 88 | ### Searching the Data 89 | 90 | ``` 91 | db.posts.find({}).sort({time:-1}).limit(100); 92 | ``` 93 | 94 | Node.js MongoDB Native *does not have* **runCommand** which is used in most 95 | full-text search examples.
So we *cannot* just do: 96 | 97 | ```javascript 98 | db.posts.runCommand( "text", { search: "justin" } ) 99 | ``` 100 | 101 | ![MongoDB Native NO runCommand](https://i.imgur.com/5LKPFNE.png) 102 | 103 | But a bit of investigation yields: 104 | 105 | ```javascript 106 | // unintuitively the text field is actually the collection! 107 | db.command({text:"posts" , search: "maths science" }, function(err, cb){ 108 | console.log(cb.results); 109 | }); 110 | ``` 111 | 112 | ### Storing Search Results 113 | 114 | The result of the above **db.command** search query has the format: 115 | 116 | ``` 117 | { score: 2.142857142857143, 118 | obj: 119 | { text: 'Math, science, history, unraveling the mystery it all started with a #BigBang💥', 120 | time: Sun Mar 30 2014 07:03:08 GMT+0100 (BST), 121 | avatar: 'https://pbs.twimg.com/profile_images/442935363095379968/CuEcmYsH_normal.jpeg', 122 | _id: 'Kxssadbi2e5X7ga5L' } }, 123 | { score: 2.142857142857143, 124 | obj: 125 | { text: 'I was just about to set my maths and science books on fire…#ihateschool', 126 | time: Sun Mar 30 2014 06:22:31 GMT+0100 (BST), 127 | avatar: 'https://pbs.twimg.com/profile_images/449715822454243329/cNN69E3A_normal.jpeg', 128 | _id: 'ghoi72BoEfswZgfws' } } 129 | ``` 130 | 131 | This returns the **score** (a *float*) and the entire record (all fields). 132 | We could return these results directly to the user and we are *done*. 133 | But going back to our original reason for diving into "*naitve*" node, 134 | we want to be able to "pipe" these results back into our Meteor app. 135 | 136 | #### New Collection: Search Results 137 | 138 | 139 | ```javascript 140 | db.search_results.insert( 141 | { 142 | keywords:"science", 143 | posts : [ 144 | { 145 | score: 2.142857142857143, 146 | _id: 'Kxssadbi2e5X7ga5L' 147 | }, 148 | { 149 | score: 2.142857142857143, 150 | _id: 'ghoi72BoEfswZgfws' 151 | } 152 | ] 153 | last_updated: new Date() 154 | } 155 | ); 156 | ``` 157 | 158 | ### Displaying Search Results 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | ## Further Reading 187 | 188 | - Searching MongoDB: https://www.mongodb.com/docs/manual/reference/operator/query/text/ 189 | - https://blog.mongohq.com/mongodb-and-full-text-search-my-first-week-with-mongodb-2-4-development-release/ 190 | - https://blog.serverdensity.com/full-text-search-in-mongodb 191 | - 12 Months with Mongo: https://blog.wordnik.com/12-months-with-mongodb 192 | - runCommand equivalent: https://stackoverflow.com/questions/16070233/runcommand-equivalent-for-nodejs-native-mongodb 193 | - If Mongoose was an option: https://stackoverflow.com/questions/19849650/full-text-search-in-mongodb-node-js-mongoose-text-search 194 | 195 | >> REPLY TO: https://stackoverflow.com/questions/17159626/implementing-mongodb-2-4s-full-text-search-in-a-meteor-app 196 | 197 | ### Prefer "Real" Search? 198 | 199 | - Feature compairson: https://solr-vs-elasticsearch.com/ 200 | - Discussion: https://stackoverflow.com/questions/10213009/solr-vs-elasticsearch 201 | - Bonsai (hosted ElasticSearch): https://www.bonsai.io/ 202 | --------------------------------------------------------------------------------