├── .gitignore ├── tests ├── vdm-data │ └── vdm-data-scanner.server.test.js └── api │ └── posts.server.api.js ├── api ├── models │ └── posts.server.model.js ├── routes │ └── posts.server.routes.js └── controllers │ └── posts.server.controller.js ├── scripts ├── vdm-data-remove.js └── vdm-data-transfer.js ├── package.json ├── config ├── express.js └── config.js ├── index.js ├── gruntfile.js ├── vdm-data ├── vdm-parser.js └── vdm-scanner.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | node_modules/ 3 | results.txt -------------------------------------------------------------------------------- /tests/vdm-data/vdm-data-scanner.server.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var should = require('should'), 4 | scanner = require('../../vdm-data/vdm-scanner.js'); 5 | 6 | describe('VDM Data Scann', function() { 7 | it('Should return the number of posts retrieved : 200', function(done) { 8 | this.timeout(20000); 9 | 10 | scanner.getVDMPosts().then(function(posts) { 11 | (posts.length).should.be.exactly(200); 12 | 13 | done(); 14 | }); 15 | }); 16 | }); -------------------------------------------------------------------------------- /api/models/posts.server.model.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Module dependencies. 5 | */ 6 | var mongoose = require('mongoose'), 7 | Schema = mongoose.Schema; 8 | 9 | /** 10 | * Post Schema 11 | */ 12 | var PostSchema = new Schema({ 13 | content: { 14 | type: String, 15 | trim: true 16 | }, 17 | author: { 18 | type: String, 19 | trim: true 20 | }, 21 | date: { 22 | type: Date, 23 | trim: true 24 | } 25 | }); 26 | 27 | mongoose.model('Post', PostSchema); -------------------------------------------------------------------------------- /scripts/vdm-data-remove.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var config = require('../config/config.js'), 4 | chalk = require('chalk'), 5 | request = require('request'); 6 | 7 | /** 8 | * Remove VDM posts 9 | */ 10 | request.delete( 11 | 'http://localhost:' + config.port + '/api/posts', 12 | function (error, response) { 13 | if (!error && response.statusCode == 200) { 14 | console.log(chalk.green('All posts were correctly removed from the database.')); 15 | } else { 16 | console.log(chalk.red('Error on posts removal. Make sure that the REST api is available.')); 17 | } 18 | } 19 | ); -------------------------------------------------------------------------------- /api/routes/posts.server.routes.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Posts routes 5 | * @param app 6 | */ 7 | module.exports = function(app) { 8 | 9 | /** 10 | * Module dependencies. 11 | */ 12 | var posts = require('../controllers/posts.server.controller.js'); 13 | 14 | app.route('/api/posts') 15 | .get(posts.list) 16 | .post(posts.createList) 17 | .delete(posts.deleteAll); 18 | 19 | app.route('/api/posts/:postId') 20 | .get(posts.read); 21 | 22 | app.route('/api/create-one-post') 23 | .post(posts.create); 24 | 25 | // Finish by binding the Post middleware 26 | app.param('postId', posts.postByID); 27 | }; 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VDMPosts", 3 | "version": "1.0.0", 4 | "description": "Application used to retrieve 200 last VDM posts and manage them through a REST API", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "Franck Abgrall", 10 | "license": "ISC", 11 | "dependencies": { 12 | "body-parser": "^1.15.2", 13 | "chalk": "^1.1.3", 14 | "express": "^4.13.4", 15 | "glob": "^7.1.0", 16 | "grunt": "^1.0.1", 17 | "grunt-exec": "^1.0.1", 18 | "grunt-mocha-test": "^0.13.2", 19 | "http": "0.0.0", 20 | "jsdom": "^9.5.0", 21 | "lodash": "^4.16.2", 22 | "method-override": "^2.3.6", 23 | "mocha": "^3.1.0", 24 | "mongoose": "^4.6.1", 25 | "path": "^0.12.7", 26 | "q": "^1.4.1", 27 | "request": "^2.75.0", 28 | "should": "^11.1.0" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /config/express.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var bodyParser = require('body-parser'), 4 | express = require('express'), 5 | config = require('./config.js'), 6 | app = express(), 7 | path = require('path'), 8 | methodOverride = require('method-override'); 9 | 10 | module.exports = function () { 11 | 12 | // Request body parsing middleware should be above methodOverride 13 | app.use(bodyParser.urlencoded({ 14 | extended: true 15 | })); 16 | app.use(bodyParser.json()); 17 | app.use(methodOverride()); 18 | 19 | // Globbing model files 20 | config.getGlobbedFiles('./api/models/**/*.js').forEach(function (modelPath) { 21 | require(path.resolve(modelPath)); 22 | }); 23 | 24 | // Globbing route files 25 | config.getGlobbedFiles('./api/routes/**/*.js').forEach(function (modelPath) { 26 | require(path.resolve(modelPath))(app); 27 | }); 28 | 29 | // Return Express server instance 30 | return app; 31 | }; -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var config = require('./config/config.js'), 4 | mongoose = require('mongoose'), 5 | chalk = require('chalk'); 6 | 7 | // Bootstrap db connection 8 | var db = mongoose.connect(config.db.uri, config.db.options, function(err) { 9 | if (err) { 10 | console.error(chalk.red('Could not connect to MongoDB!')); 11 | console.log(chalk.red(err)); 12 | } 13 | }); 14 | 15 | mongoose.connection.on('error', function(err) { 16 | console.error(chalk.red('MongoDB connection error: ' + err)); 17 | process.exit(-1); 18 | } 19 | ); 20 | 21 | // Init the express application 22 | var app = require('./config/express.js')(); 23 | 24 | // Init server 25 | var server = require('http').Server(app); 26 | server.listen(config.port); 27 | 28 | // Logging initialization 29 | console.log('-'); 30 | console.log(chalk.green('Port:\t\t\t\t' + config.port)); 31 | console.log(chalk.green('Database:\t\t\t' + config.db.uri)); -------------------------------------------------------------------------------- /scripts/vdm-data-transfer.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var scanner = require('./../vdm-data/vdm-scanner.js'), 4 | _ = require('lodash'), 5 | q = require('q'), 6 | config = require('../config/config.js'), 7 | chalk = require('chalk'), 8 | request = require('request'); 9 | 10 | console.log('Scanning VDM website ...'); 11 | 12 | /** 13 | * Get VDM posts 14 | */ 15 | scanner.getVDMPosts().then(function (posts) { 16 | console.log(chalk.green('Data retrieved.')); 17 | 18 | request.post( 19 | 'http://localhost:' + config.port + '/api/posts', 20 | {form: {data: JSON.stringify(posts)}}, 21 | function (error, response) { 22 | if (!error && response.statusCode == 200) { 23 | console.log(chalk.green('Data inserted to Database.')); 24 | } else { 25 | console.log(chalk.red('Data insertion failed. Make sure that the REST api is available.')); 26 | } 27 | } 28 | ); 29 | }); 30 | 31 | -------------------------------------------------------------------------------- /gruntfile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function (grunt) { 4 | grunt.initConfig({ 5 | exec: { 6 | getPosts: { 7 | command: 'node ./scripts/vdm-data-transfer.js', 8 | stdout: true, 9 | stderr: true 10 | }, 11 | deletePosts: { 12 | command: 'node ./scripts/vdm-data-remove.js', 13 | stdout: true, 14 | stderr: true 15 | }, 16 | serve: { 17 | command: 'node ./index.js', 18 | stdout: true, 19 | stderr: true 20 | } 21 | }, 22 | mochaTest: { 23 | test: { 24 | options: { 25 | reporter: 'spec', 26 | captureFile: 'results.txt', 27 | quiet: false, 28 | clearRequireCache: false, 29 | noFail: false 30 | }, 31 | src: ['tests/**/*.js'] 32 | } 33 | } 34 | }); 35 | 36 | grunt.loadNpmTasks('grunt-mocha-test'); 37 | grunt.loadNpmTasks('grunt-exec'); 38 | 39 | grunt.registerTask('get-posts', ['exec:getPosts']); 40 | grunt.registerTask('delete-posts', ['exec:deletePosts']); 41 | grunt.registerTask('serve', ['exec:serve']); 42 | grunt.registerTask('test', ['mochaTest']); 43 | }; 44 | 45 | -------------------------------------------------------------------------------- /config/config.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Module dependencies. 5 | */ 6 | var _ = require('lodash'), 7 | glob = require('glob'); 8 | 9 | /** 10 | * App config 11 | * @type {Function} 12 | */ 13 | module.exports = { 14 | port: process.env.PORT || 3000, 15 | db: { 16 | uri: 'mongodb://127.0.0.1/local' 17 | } 18 | }; 19 | 20 | /** 21 | * Get files by glob patterns 22 | */ 23 | module.exports.getGlobbedFiles = function(globPatterns, removeRoot) { 24 | // For context switching 25 | var _this = this; 26 | 27 | // URL paths regex 28 | var urlRegex = new RegExp('^(?:[a-z]+:)?\/\/', 'i'); 29 | 30 | // The output array 31 | var output = []; 32 | 33 | // If glob pattern is array so we use each pattern in a recursive way, otherwise we use glob 34 | if (_.isArray(globPatterns)) { 35 | globPatterns.forEach(function(globPattern) { 36 | output = _.union(output, _this.getGlobbedFiles(globPattern, removeRoot)); 37 | }); 38 | } else if (_.isString(globPatterns)) { 39 | if (urlRegex.test(globPatterns)) { 40 | output.push(globPatterns); 41 | } else { 42 | var files = glob(globPatterns, { sync: true }); 43 | if (removeRoot) { 44 | files = files.map(function(file) { 45 | return file.replace(removeRoot, ''); 46 | }); 47 | } 48 | output = _.union(output, files); 49 | } 50 | } 51 | 52 | return output; 53 | }; -------------------------------------------------------------------------------- /vdm-data/vdm-parser.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Extract content from post 5 | * @param data 6 | * @returns {*|XMLList} 7 | */ 8 | module.exports.extractContent = function (data) { 9 | // Get content 10 | var content = data.find('.content .fmllink').text(); 11 | 12 | return content; 13 | }; 14 | 15 | /** 16 | * Extract Author from post 17 | * @param data 18 | * @returns {string} 19 | */ 20 | module.exports.extractAuthor = function (data) { 21 | var footer = data.find('.date p').text(); 22 | var tmp = footer.substring(10); 23 | 24 | // Get author name 25 | var author = tmp.substring(0, tmp.indexOf('/')); 26 | 27 | return author; 28 | }; 29 | 30 | /** 31 | * Extract Date from post 32 | * @param data 33 | * @returns {Date} 34 | */ 35 | module.exports.extractDate = function (data) { 36 | var footer = data.find('.date p').text(); 37 | 38 | var tmp = footer.substring(footer.indexOf('/') + 2); 39 | 40 | // Get date (day, month, year) 41 | var date = tmp.substring(0, tmp.indexOf(' ')); 42 | var day = date.split('/')[0]; 43 | var month = date.split('/')[1]; 44 | var year = date.split('/')[2]; 45 | 46 | tmp = footer.substring(footer.indexOf('/') + 2); 47 | tmp = tmp.substring(tmp.indexOf(' à ') + 3); 48 | 49 | // Get hour and minute 50 | var hourAndMinutes = tmp.substring(0, tmp.indexOf(' ')); 51 | var hour = hourAndMinutes.split(':')[0]; 52 | var minute = hourAndMinutes.split(':')[1]; 53 | 54 | 55 | return new Date(year, month - 1, day, hour, minute); 56 | }; -------------------------------------------------------------------------------- /vdm-data/vdm-scanner.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var jsdom = require('jsdom'), 4 | _ = require('lodash'), 5 | parser = require('./vdm-parser'), 6 | q = require('q'); 7 | 8 | /** 9 | * Scan a page from VDM website and add data to VDMPosts 10 | * @param VDMPosts 11 | * @param pageCount 12 | * @param task 13 | */ 14 | module.exports.launchScan = function(VDMPosts, pageCount, task) { 15 | jsdom.env( 16 | 'http://www.viedemerde.fr/?page=' + pageCount, 17 | ['http://code.jquery.com/jquery.js'], 18 | function (err, window) { 19 | var $ = window.$; 20 | 21 | /** 22 | * Iterate through each post and get their content, author and date 23 | */ 24 | $('.post.article').not('.is-picture').each(function () { 25 | 26 | if (VDMPosts.length < 200) 27 | VDMPosts.push({ 28 | content: parser.extractContent($(this)), 29 | author: parser.extractAuthor($(this)), 30 | date: parser.extractDate($(this)) 31 | }); 32 | else { 33 | return false; 34 | } 35 | 36 | }); 37 | 38 | // Resolve the current task 39 | task.resolve(); 40 | } 41 | ); 42 | }; 43 | 44 | /** 45 | * Retrieve posts from VDM 46 | */ 47 | module.exports.getVDMPosts = function () { 48 | var VDMPosts = []; 49 | var tasks = []; 50 | var deferred = q.defer(); 51 | var currTask = null; 52 | 53 | /** 54 | * We need 16 pages to get 200 posts 55 | */ 56 | for (var pageCount = 0; pageCount < 16; pageCount++) { 57 | 58 | // Create a list of tasks 59 | currTask = q.defer(); 60 | tasks.push(currTask); 61 | 62 | this.launchScan(VDMPosts, pageCount, currTask); 63 | } 64 | 65 | // Build a list of promises from tasks 66 | var promises = []; 67 | _(tasks).forEach(function(task) { 68 | promises.push(task.promise); 69 | }); 70 | 71 | // Resolve the promise when all tasks are completed 72 | q.all(promises).then(function() { 73 | deferred.resolve(VDMPosts); 74 | }); 75 | 76 | return deferred.promise; 77 | }; -------------------------------------------------------------------------------- /api/controllers/posts.server.controller.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Module dependencies. 5 | */ 6 | var mongoose = require('mongoose'), 7 | q = require('q'), 8 | Post = mongoose.model('Post'); 9 | 10 | /** 11 | * Create a Post 12 | */ 13 | exports.create = function(req, res) { 14 | var post = new Post(req.body); 15 | 16 | post.save(function(err) { 17 | if (err) { 18 | return res.status(400).send(err); 19 | } else { 20 | res.jsonp(post); 21 | } 22 | }); 23 | }; 24 | 25 | /** 26 | * Create list of Posts 27 | */ 28 | exports.createList = function(req, res) { 29 | var posts = JSON.parse(req.body.data); 30 | var tasks = []; 31 | 32 | for (var count = 0; count < posts.length; count++) { 33 | var post = new Post(posts[count]); 34 | tasks.push(post.save()); 35 | } 36 | 37 | q.all(tasks).then(function() { 38 | res.status(200).send(); 39 | }, function(err) { 40 | res.status(400).send(err); 41 | }); 42 | }; 43 | 44 | /** 45 | * List of posts 46 | */ 47 | exports.list = function(req, res) { 48 | var clause = {}; 49 | 50 | // Clause formating 51 | if (req.query.author) 52 | clause.author = req.query.author; 53 | if (req.query.from || req.query.to) 54 | clause.date = {}; 55 | if (req.query.from) 56 | clause.date.$gte = req.query.from; 57 | if (req.query.to) 58 | clause.date.$lt = req.query.to; 59 | 60 | Post.find(clause).exec(function(err, posts) { 61 | if (err) { 62 | return res.status(400).send(err); 63 | } else { 64 | res.jsonp({posts: posts, count: posts.length}); 65 | } 66 | }); 67 | }; 68 | 69 | /** 70 | * Show the current Post 71 | */ 72 | exports.read = function(req, res) { 73 | res.jsonp({post: req.post}); 74 | }; 75 | 76 | /** 77 | * Post middleware 78 | */ 79 | exports.postByID = function(req, res, next, id) { 80 | Post.findById(id).exec(function(err, post) { 81 | if (err) return res.status(400).send(err); 82 | if (! post) return next(new Error('Unable to find post with id : ' + id)); 83 | req.post = post ; 84 | next(); 85 | }); 86 | }; 87 | 88 | /** 89 | * Delete all posts 90 | */ 91 | exports.deleteAll = function(req, res) { 92 | Post.find().remove(function(err) { 93 | if (err) { 94 | return res.status(400).send(err); 95 | } else { 96 | return res.status(200).send(); 97 | } 98 | }); 99 | }; 100 | -------------------------------------------------------------------------------- /tests/api/posts.server.api.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var should = require('should'), 4 | config = require('../../config/config.js'), 5 | request = require('request'); 6 | 7 | describe('CRUD tests', function () { 8 | 9 | /** 10 | * Clear the database before running tests 11 | */ 12 | before(function(done) { 13 | request.delete( 14 | 'http://localhost:' + config.port + '/api/posts', function () { 15 | done(); 16 | } 17 | ); 18 | }); 19 | 20 | it('Should be able to create a post', function (done) { 21 | var post = { 22 | content: 'content', 23 | author: 'franck', 24 | date: new Date(28, 11, 1993) 25 | }; 26 | 27 | request.post( 28 | 'http://localhost:' + config.port + '/api/create-one-post', {form: post}, 29 | function (error, response, body) { 30 | body = JSON.parse(body); 31 | should.exist(body._id); 32 | 33 | done(); 34 | } 35 | ); 36 | }); 37 | 38 | it('Should be able to find all posts', function (done) { 39 | request.get( 40 | 'http://localhost:' + config.port + '/api/posts', 41 | function (error, response, body) { 42 | body = JSON.parse(body); 43 | should.equal(body.posts.length, 1); 44 | 45 | done(); 46 | } 47 | ); 48 | }); 49 | 50 | it('Should be able to find post with author', function (done) { 51 | request.get( 52 | 'http://localhost:' + config.port + '/api/posts?author=franck', 53 | function (error, response, body) { 54 | body = JSON.parse(body); 55 | should.equal(body.posts[0].author, 'franck'); 56 | 57 | done(); 58 | } 59 | ); 60 | }); 61 | 62 | it('Should not be able to find a post because of the range of \'from\' and \'to\'', function (done) { 63 | request.get( 64 | 'http://localhost:' + config.port + '/api/posts?from=1994-01-01&to=2016-01-01', 65 | function (error, response, body) { 66 | body = JSON.parse(body); 67 | should.equal(body.posts.length, 0); 68 | 69 | done(); 70 | } 71 | ); 72 | }); 73 | 74 | /** 75 | * Clear the database after running tests 76 | */ 77 | after(function(done) { 78 | request.delete( 79 | 'http://localhost:' + config.port + '/api/posts', function () { 80 | done(); 81 | } 82 | ); 83 | }); 84 | }); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VDMPosts 2 | This project provides a tool and a REST api allowing to do the following : 3 | * Get the 200 last posts of the VDM website (http://www.viedemerde.fr/) and store them to a Mongo database. 4 | * Manipulate the posts through a REST api. 5 | 6 | # Environment Required 7 | * Nodejs 8 | * Local mongodb running 9 | 10 | # Install 11 | 12 | ### Install grunt cli 13 | ```sh 14 | $ npm install grunt-cli -g 15 | ``` 16 | 17 | ### Install mocha cli 18 | ```sh 19 | $ npm install mocha -g 20 | ``` 21 | 22 | ### Install dependencies 23 | Go to the root of the project and use the following command line : 24 | ```sh 25 | $ npm install 26 | ``` 27 | 28 | # Config 29 | 30 | Database settings are available in ``/config/config.js`` 31 | 32 | # Features 33 | 34 | ### Run the REST api 35 | ```sh 36 | $ grunt serve 37 | ``` 38 | This will run the REST api. 39 | 40 | ### Scan VDM last posts 41 | /!\ The REST api must be running to use this command. /!\ 42 | ```sh 43 | $ grunt get-posts 44 | ``` 45 | This will retrieve the 200 last posts from http://www.viedemerde.fr/ and store them to the database. 46 | 47 | ### Remove all posts 48 | /!\ The REST api must be running to use this command. /!\ 49 | ```sh 50 | $ grunt delete-posts 51 | ``` 52 | This will delete all the posts. 53 | 54 | # Use the REST api 55 | The posts can be access on localhost with the default port 3000. You can modify the port by modifying the environment variable 'PORT'. 56 | The route to get posts is the following : 57 | ``/api/posts`` 58 | 59 | Output : 60 | 61 | ``` 62 | { 63 | "posts":[ 64 | { 65 | "_id":1, 66 | "content":"This is the content.", 67 | "date":"2014-01-0100:00:00", 68 | "author":"Genius" 69 | } 70 | ], 71 | "count":1 72 | } 73 | ``` 74 | 75 | Several parameters are available : 76 | * ``from=YYYY-MM-DD`` allow to get posts from this date 77 | * ``to=YYYY-MM-DD`` allow to get posts until this date 78 | * ``author=XXXXX`` allow to get posts from this author 79 | 80 | You can access one post with the following route : 81 | ``/api/posts/`` 82 | 83 | Output : 84 | 85 | ``` 86 | { 87 | "post": { 88 | "id":1, 89 | "content":"This is the content.", 90 | "date":"2014-01-0100:00:00", 91 | "author":"Genius" 92 | } 93 | } 94 | ``` 95 | 96 | # Tests 97 | 98 | /!\ The REST api must be running to launch unit tests /!\ 99 | 100 | Unit tests can be run with the following command : 101 | 102 | ```sh 103 | $ grunt test 104 | ``` 105 | 106 | /!\ Running 'grunt test' will clear the database BEFORE and AFTER the tests /!\ 107 | 108 | If you want to run a test one at a time, you can use the following command : 109 | 110 | ```sh 111 | $ mocha 112 | ``` 113 | # App architecture 114 | 115 | The application is divided in 5 parts : 116 | 117 | * [api] 118 | 119 | - The controllers folder contains js files that will build the CRUD requests. 120 | - The models folder contains js files that will define a mongodb document structure. 121 | - The routes folder contains js files that will defines the routes of the REST api. 122 | 123 | * [tests] 124 | 125 | This folder contains js files needed to run unit tests. 126 | 127 | * [vdm-data] 128 | 129 | This folder contains js files needed to extract and parse the data from the VDM website. 130 | 131 | * [scripts] 132 | 133 | This folder contains scripts that allows to delete all posts from the database and launch the data transfer between VDM website and the database. 134 | 135 | * [config] 136 | 137 | This folder contains all configurations like the database uri, express settings ... 138 | --------------------------------------------------------------------------------