├── .eslintignore ├── public ├── font │ ├── icons.eot │ ├── icons.ttf │ ├── icons.woff │ ├── icons.woff2 │ └── icons.svg ├── search.js ├── icons.css └── styles.css ├── config ├── index.js └── config.development.js ├── .editorconfig ├── CHANGELOG.md ├── views ├── error.pug ├── profile.pug ├── navigation.pug ├── result.pug ├── layout.pug ├── login.pug ├── help.pug └── search.pug ├── .eslintrc ├── .gitignore ├── lib ├── checkLogin.js ├── checkToken.js ├── queryElastic.js └── findUser.js ├── package.json ├── LICENSE ├── bin └── www ├── routes └── index.js ├── app.js └── README.md /.eslintignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | -------------------------------------------------------------------------------- /public/font/icons.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.eot -------------------------------------------------------------------------------- /public/font/icons.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.ttf -------------------------------------------------------------------------------- /public/font/icons.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.woff -------------------------------------------------------------------------------- /public/font/icons.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.woff2 -------------------------------------------------------------------------------- /config/index.js: -------------------------------------------------------------------------------- 1 | const env = process.env.NODE_ENV || 'development'; 2 | const config = require(`./config.${env}`); 3 | 4 | module.exports = config; 5 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_style = space 3 | indent_size = 2 4 | end_of_line = lf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | 9 | [*.md] 10 | trim_trailing_whitespace = false 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ### 1.1 4 | - Add simple REST API for search 5 | - Add authentication for API endpoints 6 | - Add user profile page 7 | 8 | ### 1.0 9 | - Add basic search interface 10 | - Add login and session-based authentication 11 | - Add help page and setup instructions 12 | - Add simple user management 13 | - Add configuration 14 | -------------------------------------------------------------------------------- /views/error.pug: -------------------------------------------------------------------------------- 1 | extends layout 2 | 3 | block content 4 | 5 | include navigation 6 | 7 | section.error.white 8 | 9 | .center 10 | 11 | h1=title 12 | 13 | p Sorry for the inconvenience. Return to the homepage. 14 | 15 | if error 16 | p.fail 17 | strong=error.message 18 | 19 | pre=message.stack 20 | 21 | -------------------------------------------------------------------------------- /views/profile.pug: -------------------------------------------------------------------------------- 1 | extends layout 2 | 3 | block content 4 | 5 | include navigation 6 | 7 | section.profile.white 8 | 9 | .center 10 | 11 | h1 User profile 12 | 13 | p Hello #{user.screenname} 14 | 15 | p Your API token: 16 | pre=user.apiToken 17 | 18 | if error 19 | p.fail 20 | strong=error.message 21 | 22 | pre=message.stack 23 | -------------------------------------------------------------------------------- /public/search.js: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', changeIcon, false); 2 | 3 | function changeIcon() { 4 | var $loadingIcon = document.querySelector('.loading'); 5 | var $submitButton = document.querySelector('.submit'); 6 | 7 | if ($loadingIcon) { 8 | $submitButton.addEventListener('click', function () { 9 | $loadingIcon.className = 'icon-spinner'; 10 | }, false); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | extends: 'eslint:recommended', 3 | parser: 'babel-eslint', 4 | 'rules': { 5 | 'indent': [2, 2, { 'SwitchCase': 1 }], 6 | 'quotes': [2, 'single'], 7 | 'linebreak-style': [2, 'unix'], 8 | 'semi': [2, 'always'], 9 | 'comma-dangle': [1, 'never'], 10 | 'no-trailing-spaces': [2, { 'skipBlankLines': true }], 11 | 'no-console': 0 12 | }, 13 | 'env': { 14 | 'node': true 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /views/navigation.pug: -------------------------------------------------------------------------------- 1 | nav.navigation 2 | 3 | span 4 | a(href='/') 5 | i(class='icon-search') 6 | | Search 7 | span 8 | a(href='#') 9 | i(class='icon-upload-cloud') 10 | | Import 11 | span 12 | a(href='/help') 13 | i(class='icon-help') 14 | | Help 15 | 16 | if user 17 | span.logout 18 | a(href='/logout') 19 | i(class='icon-logout') 20 | | Logout 21 | span.profile 22 | a(href='/profile') 23 | i(class='icon-user') 24 | | Profile 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Project ### 2 | pdf/ 3 | text/ 4 | contratos-text/ 5 | contratos-pdf/ 6 | 7 | ### Node ### 8 | node_modules 9 | logs 10 | *.log 11 | pids 12 | *.pid 13 | *.seed 14 | .grunt 15 | .tmp 16 | build/Release 17 | .lock-wscript 18 | 19 | ### Windows ### 20 | Thumbs.db 21 | ehthumbs.db 22 | Desktop.ini 23 | $RECYCLE.BIN/ 24 | 25 | ### OSX ### 26 | .DS_Store 27 | .AppleDouble 28 | .LSOverride 29 | Icon 30 | ._* 31 | .Spotlight-V100 32 | .Trashes 33 | 34 | ### Project Files ### 35 | .idea/ 36 | .idea_modules/ 37 | atlassian-ide-plugin.xml 38 | com_crashlytics_export_strings.xml 39 | *.sublime-workspace 40 | -------------------------------------------------------------------------------- /lib/checkLogin.js: -------------------------------------------------------------------------------- 1 | // Checks if a user is authenticated 2 | // @TODO Preserve query string on redirect 3 | function checkLogin(options) { 4 | options = options || {}; 5 | 6 | const url = options.redirectTo || '/login'; 7 | const setReturnTo = (options.setReturnTo === undefined) ? true : options.setReturnTo; 8 | 9 | return (req, res, next) => { 10 | if (!req.isAuthenticated || !req.isAuthenticated()) { 11 | if (setReturnTo && req.session) { 12 | req.session.returnTo = req.originalUrl || req.url; 13 | } 14 | res.redirect(url); 15 | } 16 | next(); 17 | }; 18 | } 19 | 20 | module.exports = checkLogin; 21 | -------------------------------------------------------------------------------- /views/result.pug: -------------------------------------------------------------------------------- 1 | extends search 2 | 3 | block result 4 | 5 | if error 6 | p.fail 7 | strong=error.message 8 | 9 | else 10 | .count 11 | p Found #{result.hits.total} documents (#{result.took} ms) 12 | 13 | each doc in result.hits.hits 14 | .document 15 | .download 16 | a.pdf(href=doc._source.file target='blank') 17 | i(class='icon-file-pdf') 18 | span PDF 19 | a.text(href=doc._source.file target='blank') 20 | i(class='icon-doc-text') 21 | span Text 22 | .header.doc 23 | span.title #{doc._source.name} 24 | each hit in doc.highlight.body 25 | p.hit!=hit 26 | -------------------------------------------------------------------------------- /lib/checkToken.js: -------------------------------------------------------------------------------- 1 | // Checks if a token is valid 2 | const passport = require('passport'); 3 | 4 | function checkToken() { 5 | return (req, res, next) => { 6 | return passport.authenticate('bearer', { 7 | session: false 8 | }, (error, user) => { 9 | if (!user) { 10 | if (error) { 11 | res.status(401); 12 | res.json({ error: error.message }); 13 | res.end(); 14 | } else { 15 | res.status(401); 16 | res.json({ error: 'Please provide an API token' }); 17 | res.end(); 18 | } 19 | } else { 20 | next(); 21 | } 22 | })(req, res, next); 23 | }; 24 | } 25 | 26 | module.exports = checkToken; 27 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "elasticsearch-frontend", 3 | "description": "Search and explore document leaks with Elasticsearch", 4 | "author": "Steffen Kühne", 5 | "version": "1.1.0", 6 | "license": "MIT", 7 | "private": true, 8 | "repository": { 9 | "type": "git", 10 | "url": "git@github.com:br-data/elasticsearch-frontend.git" 11 | }, 12 | "scripts": { 13 | "start": "node ./bin/www" 14 | }, 15 | "dependencies": { 16 | "bcrypt": "^5.0.0", 17 | "body-parser": "^1.19.0", 18 | "connect-ensure-login": "^0.1.1", 19 | "cookie-parser": "^1.4.4", 20 | "debug": "^3.2.6", 21 | "elasticsearch": "^15.5.0", 22 | "express": "^4.17.1", 23 | "express-session": "^1.16.2", 24 | "node-dir": "^0.1.17", 25 | "passport": "^0.4.0", 26 | "passport-http-bearer": "^1.0.1", 27 | "passport-local": "^1.0.0", 28 | "pug": "^3.0.1", 29 | "req-flash": "0.0.3" 30 | }, 31 | "devDependencies": { 32 | "eslint-config-esnext": "^4.0.0" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /views/layout.pug: -------------------------------------------------------------------------------- 1 | doctype html 2 | 3 | html 4 | 5 | head 6 | meta(charset='utf-8') 7 | title=page.title 8 | link(href='' rel='icon' type='image/x-icon') 9 | 10 | link(rel='stylesheet' href='/icons.css') 11 | link(rel='stylesheet' href='/styles.css') 12 | 13 | body 14 | block content 15 | -------------------------------------------------------------------------------- /views/login.pug: -------------------------------------------------------------------------------- 1 | extends layout 2 | 3 | block content 4 | 5 | include navigation 6 | 7 | section.login.white 8 | 9 | .center 10 | 11 | h1.title=page.title 12 | p.description=page.description 13 | 14 | if error.message 15 | p.fail 16 | strong=error.message 17 | else 18 | p 19 | strong Please log in to continue: 20 | 21 | form(action='/login' method='post') 22 | 23 | fieldset 24 | span.right 25 | i(class='icon-user') 26 | span.left 27 | input( 28 | type='text' 29 | name='username' 30 | placeholder='Username' 31 | ) 32 | 33 | fieldset 34 | span.right 35 | i(class='icon-lock-open-alt') 36 | span.left 37 | input( 38 | type='password' 39 | name='password' 40 | placeholder='Password' 41 | ) 42 | 43 | fieldset 44 | button.submit(type='submit' value='Login') Login 45 | i.icon-login 46 | -------------------------------------------------------------------------------- /lib/queryElastic.js: -------------------------------------------------------------------------------- 1 | // Queries Elasticsearch for data using different methods 2 | const elastic = require('elasticsearch'); 3 | const config = require('../config'); 4 | 5 | const elasticClient = new elastic.Client({ host: config.database.host }); 6 | 7 | function queryElastic() { 8 | return (req, res, next) => { 9 | if (req.query.query && req.query.type) { 10 | elasticClient.search(buildQuery(req), (error, data) => { 11 | req.error = error; 12 | req.result = data; 13 | next(); 14 | }); 15 | } else { 16 | req.error = new Error('Query string and query type are undefined'); 17 | req.result = null; 18 | next(); 19 | } 20 | }; 21 | } 22 | 23 | function buildQuery(req) { 24 | let query = config.queries[req.query.type]; 25 | 26 | query.setQuery(req.query.query); 27 | query = query.query; 28 | 29 | return { 30 | index: config.database.index, 31 | size: 500, 32 | body: { 33 | query: query, 34 | _source: config._source, 35 | highlight: config.highlight 36 | } 37 | }; 38 | } 39 | 40 | module.exports = queryElastic; 41 | -------------------------------------------------------------------------------- /lib/findUser.js: -------------------------------------------------------------------------------- 1 | function findByUsername(username, database, callback) { 2 | process.nextTick(() => { 3 | const user = database.filter(user => user.username === username); 4 | 5 | if (user && user.length > 0) { 6 | callback(null, user[0]); 7 | } else { 8 | callback(new Error('Invalid username')); 9 | } 10 | }); 11 | } 12 | 13 | function findById (id, database, callback) { 14 | process.nextTick(() => { 15 | const user = database.filter(user => user.id === id); 16 | 17 | if (user && user.length > 0) { 18 | callback(null, user[0]); 19 | } else { 20 | callback(new Error('Invalid user ID')); 21 | } 22 | }); 23 | } 24 | 25 | function findByToken(token, database, callback) { 26 | process.nextTick(() => { 27 | const user = database.filter(user => user.apiToken === token); 28 | 29 | if (user && user.length > 0) { 30 | callback(null, user[0]); 31 | } else { 32 | callback(new Error('Invalid API token')); 33 | } 34 | }); 35 | } 36 | 37 | module.exports = { 38 | byUsername: findByUsername, 39 | byId: findById, 40 | byToken: findByToken 41 | }; 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Bayerischer Rundfunk 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/www: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const debug = require('debug')('app'); 4 | const app = require('../app.js'); 5 | const port = process.env.PORT || 3000; 6 | 7 | const server = app.listen(process.env.PORT || 3000, () => { 8 | debug(`Express server listening on port ${server.address().port}`); 9 | }); 10 | 11 | function onError(err) { 12 | if (err.syscall !== 'listen') { 13 | throw err; 14 | } 15 | 16 | const bind = typeof port === 'string' ? `Pipe ${port}` : `Port ${port}`; 17 | 18 | // Handle specific listen errors with friendly messages 19 | switch (err.code) { 20 | case 'EACCES': 21 | console.error(`${bind} requires elevated privileges`); 22 | process.exit(1); 23 | break; 24 | case 'EADDRINUSE': 25 | console.error(`${bind} is already in use`); 26 | process.exit(1); 27 | break; 28 | default: 29 | throw err; 30 | } 31 | } 32 | 33 | function onListening() { 34 | const addr = server.address(); 35 | const bind = typeof addr === 'string' ? `Pipe ${addr}` : `Port ${addr.port}`; 36 | debug(`Listening on ${bind}`); 37 | } 38 | 39 | server.on('error', onError); 40 | server.on('listening', onListening); 41 | -------------------------------------------------------------------------------- /views/help.pug: -------------------------------------------------------------------------------- 1 | extends layout 2 | 3 | block content 4 | 5 | include navigation 6 | 7 | section.help.white 8 | 9 | .center 10 | 11 | h1 About the search 12 | p There are four different ways to search for whole sentences (full-text) or a single words (term): 13 | 14 | h3 Standard search 15 | p Full-text search. Finds exact word combinations like John Doe. Diacritcs are ignored and a search for John Doe will also find Jóhñ Döé. 16 | 17 | h3 Custom search 18 | p Full-text search. By default, the custom search finds all documents that contain John AND Doe. Supports wildcards and simple search operators: 19 | ul 20 | li + signifies AND operation 21 | li | signifies OR operation 22 | li - negates a single token 23 | li " wraps a number of tokens to signify a phrase for searching 24 | li * at the end of a term signifies a prefix query 25 | li ~N after a word signifies edit distance (fuzziness) 26 | li ~N after a phrase signifies slop amount 27 | 28 | h3 Fuzzy search 29 | p Term-based search. Finds words, even if they contain a typo or OCR mistake. A search for Jhon or J°hn will also find John. 30 | 31 | h3 Regex search 32 | p Term-based search for experts. Uses Regex patterns like J.h* for searching. This Regex will find words such as John, Jahn and Johnson. 33 | -------------------------------------------------------------------------------- /config/config.development.js: -------------------------------------------------------------------------------- 1 | const config = {}; 2 | 3 | config.page = { 4 | title: 'Document Search', 5 | description: 'Search Elasticsearch documents for persons, companies and addresses.' 6 | }; 7 | 8 | config.users = [ 9 | { 10 | id: 1, 11 | username: 'user', 12 | screenname: 'Demo User', 13 | password: '$2a$10$vP0qJyEd0hvvpG5MAaHg9ObUJJpJj9HxINZ/Yqz5nPo5Ms2nhR4r.', 14 | apiToken: '0b414d8433124406be6500833f1672e5' 15 | } 16 | ]; 17 | 18 | config.database = { 19 | type: 'elasticsearch', 20 | host: 'localhost:9200', 21 | index: 'joram' 22 | }; 23 | 24 | config.queries = { 25 | match: { 26 | name: 'Standard Search', 27 | query: { 28 | multi_match: { 29 | query: undefined, 30 | fields: ['body', 'body.folded'], 31 | type: 'phrase' 32 | } 33 | }, 34 | setQuery: function (query) { 35 | this.query.multi_match.query = query; 36 | } 37 | }, 38 | custom: { 39 | name: 'Custom Search', 40 | query: { 41 | simple_query_string: { 42 | query: undefined, 43 | fields: ['body','body.folded'], 44 | default_operator: 'and', 45 | analyze_wildcard: true 46 | } 47 | }, 48 | setQuery: function (query) { 49 | this.query.simple_query_string.query = query; 50 | } 51 | }, 52 | fuzzy: { 53 | name: 'Fuzzy Search', 54 | query: { 55 | fuzzy: { 56 | body: undefined 57 | } 58 | }, 59 | setQuery: function (query) { 60 | this.query.fuzzy.body = query; 61 | } 62 | }, 63 | regexp: { 64 | name: 'Regex Search', 65 | query: { 66 | regexp: { 67 | body: undefined 68 | } 69 | }, 70 | setQuery: function (query) { 71 | this.query.regexp.body = query; 72 | } 73 | } 74 | }; 75 | 76 | config._source = { 77 | excludes: ['body*'] 78 | }; 79 | 80 | config.highlight = { 81 | fields: { 82 | body: {} 83 | //'body.folded': {} 84 | } 85 | }; 86 | 87 | module.exports = config; 88 | -------------------------------------------------------------------------------- /views/search.pug: -------------------------------------------------------------------------------- 1 | extends layout 2 | 3 | block content 4 | 5 | include navigation 6 | 7 | script(src='/search.js') 8 | 9 | header.search.white 10 | 11 | .center 12 | 13 | h1.title=page.title 14 | p.description=page.description 15 | 16 | form(action='search') 17 | 18 | fieldset.main 19 | span.right 20 | button.submit(type='submit') Search 21 | i.icon-search.loading 22 | span.left 23 | input( 24 | type='search' 25 | name='query' 26 | placeholder='Start searching' 27 | value=(query.query ? query.query : '') 28 | ) 29 | 30 | fieldset.options.type 31 | strong Type: 32 | div 33 | input( 34 | type='radio' 35 | name='type' 36 | value='match' 37 | checked=(query.type === 'match' ? 'checked' : '') 38 | ) 39 | label Standard 40 | div 41 | input( 42 | type='radio' 43 | name='type' 44 | value='custom' 45 | checked=(query.type === 'custom' ? 'checked' : false) 46 | ) 47 | label Custom 48 | div 49 | input( 50 | type='radio' 51 | name='type' 52 | value='fuzzy' 53 | checked=(query.type === 'fuzzy' ? 'checked' : false) 54 | ) 55 | label Fuzzy 56 | div 57 | input( 58 | type='radio' 59 | name='type' 60 | value='regexp' 61 | checked=(query.type === 'regexp' ? 'checked' : false) 62 | ) 63 | label Regex 64 | 65 | fieldset.options.sort 66 | strong Sort: 67 | div 68 | input( 69 | type='radio' 70 | name='sorting' 71 | value='date' 72 | checked=(query.sorting === 'date' ? 'checked' : '') 73 | ) 74 | label Date 75 | div 76 | input( 77 | type='radio' 78 | name='sorting' 79 | value='relevance' 80 | checked=(query.sorting === 'relevance' ? 'checked' : false) 81 | ) 82 | label Relevance 83 | 84 | section.result.center 85 | block result 86 | -------------------------------------------------------------------------------- /public/icons.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'icons'; 3 | src: url('./font/icons.eot?43890422'); 4 | src: url('./font/icons.eot?43890422#iefix') format('embedded-opentype'), 5 | url('./font/icons.woff2?43890422') format('woff2'), 6 | url('./font/icons.woff?43890422') format('woff'), 7 | url('./font/icons.ttf?43890422') format('truetype'), 8 | url('./font/icons.svg?43890422#icons') format('svg'); 9 | font-weight: normal; 10 | font-style: normal; 11 | } 12 | 13 | [class^="icon-"]:before, 14 | [class*=" icon-"]:before { 15 | font-family: "icons"; 16 | font-style: normal; 17 | font-weight: normal; 18 | speak: none; 19 | display: inline-block; 20 | text-decoration: inherit; 21 | width: 1em; 22 | margin-right: .2em; 23 | text-align: center; 24 | font-variant: normal; 25 | text-transform: none; 26 | line-height: 1em; 27 | margin-left: .2em; 28 | -webkit-font-smoothing: antialiased; 29 | -moz-osx-font-smoothing: grayscale; 30 | } 31 | 32 | .icon-logout:before { content: '\e800'; } /* '' */ 33 | .icon-login:before { content: '\e801'; } /* '' */ 34 | .icon-search:before { content: '\e802'; } /* '' */ 35 | .icon-user:before { content: '\e804'; } /* '' */ 36 | .icon-lock:before { content: '\e803'; } /* '' */ 37 | .icon-github-circled:before { content: '\f09b'; } /* '' */ 38 | .icon-upload-cloud:before { content: '\f0ee'; } /* '' */ 39 | .icon-doc-text:before { content: '\f0f6'; } /* '' */ 40 | .icon-spinner:before { content: '\f110'; } /* '' */ 41 | .icon-help:before { content: '\f128'; } /* '' */ 42 | .icon-lock-open-alt:before { content: '\f13e'; } /* '' */ 43 | .icon-doc-inv:before { content: '\f15b'; } /* '' */ 44 | .icon-doc-text-inv:before { content: '\f15c'; } /* '' */ 45 | .icon-sort-alt-down:before { content: '\f161'; } /* '' */ 46 | .icon-file-pdf:before { content: '\f1c1'; } /* '' */ 47 | 48 | .icon-spinner { 49 | display: inline-block; 50 | webkit-animation: spin 1s infinite steps(8); 51 | animation: spin 1s infinite steps(8); 52 | } 53 | 54 | @-webkit-keyframes spin { 55 | 0% { 56 | -webkit-transform: rotate(0deg); 57 | transform: rotate(0deg); 58 | } 59 | 100% { 60 | -webkit-transform: rotate(359deg); 61 | transform: rotate(359deg); 62 | } 63 | } 64 | 65 | @keyframes spin { 66 | 0% { 67 | -webkit-transform: rotate(0deg); 68 | transform: rotate(0deg); 69 | } 70 | 100% { 71 | -webkit-transform: rotate(359deg); 72 | transform: rotate(359deg); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /routes/index.js: -------------------------------------------------------------------------------- 1 | const express = require('express'); 2 | const router = express.Router(); 3 | const passport = require('passport'); 4 | 5 | const checkLogin = require('../lib/checkLogin'); 6 | const checkToken = require('../lib/checkToken'); 7 | const queryElastic = require('../lib/queryElastic'); 8 | 9 | // Define routes 10 | router.get('/', 11 | checkLogin({ redirectTo: 'login' }), 12 | (req, res) => { 13 | res.render('search', { 14 | error: req.error, 15 | result: req.result, 16 | query: req.query, 17 | user: req.user 18 | }); 19 | } 20 | ); 21 | 22 | router.get('/login', 23 | (req, res) => { 24 | res.render('login', { 25 | error: { message: req.flash('error') } 26 | }); 27 | } 28 | ); 29 | 30 | router.post('/login', 31 | passport.authenticate('local', { 32 | failureRedirect: '/login', 33 | failureFlash: true 34 | }), 35 | (req, res) => { 36 | res.redirect('/'); 37 | } 38 | ); 39 | 40 | router.get('/logout', 41 | checkLogin({ redirectTo: 'login' }), 42 | (req, res) => { 43 | req.logout(); 44 | res.redirect('/'); 45 | } 46 | ); 47 | 48 | router.get('/profile', 49 | checkLogin({ redirectTo: 'login' }), 50 | (req, res) => { 51 | res.render('profile', { 52 | error: req.error, 53 | result: req.result, 54 | query: req.query, 55 | user: req.user 56 | }); 57 | } 58 | ); 59 | 60 | router.get('/search', 61 | checkLogin({ redirectTo: 'login' }), 62 | queryElastic(), 63 | (req, res) => { 64 | res.render('result', { 65 | error: req.error, 66 | result: req.result, 67 | query: req.query, 68 | user: req.user 69 | }); 70 | } 71 | ); 72 | 73 | router.get('/help', 74 | checkLogin({ redirectTo: 'login' }), 75 | (req, res) => { 76 | res.render('help', { 77 | error: req.error, 78 | result: req.result, 79 | query: req.query, 80 | user: req.user 81 | }); 82 | } 83 | ); 84 | 85 | router.get('/api', 86 | passport.authenticate('bearer', { 87 | session: false 88 | }), 89 | (req, res) => { 90 | res.json({ 91 | 'message': 'API is up and running', 92 | 'username': req.user.username 93 | }); 94 | } 95 | ); 96 | 97 | router.get('/api/search', 98 | checkToken(), 99 | queryElastic(), 100 | (req, res) => { 101 | res.json({ 102 | error: req.error, 103 | result: req.result 104 | }); 105 | } 106 | ); 107 | 108 | module.exports = router; 109 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | const express = require('express'); 4 | const session = require('express-session'); 5 | const app = express(); 6 | 7 | const bcrypt = require('bcrypt'); 8 | 9 | const passport = require('passport'); 10 | const LocalStrategy = require('passport-local').Strategy; 11 | const BearerStrategy = require('passport-http-bearer').Strategy; 12 | 13 | const flash = require('req-flash'); 14 | const cookieParser = require('cookie-parser'); 15 | const bodyParser = require('body-parser'); 16 | 17 | const routes = require('./routes'); 18 | const findUser = require('./lib/findUser'); 19 | 20 | const config = require('./config'); 21 | 22 | // Copy page config to global 23 | app.locals.page = config.page; 24 | 25 | // Set the authentication strategy for the web interface 26 | passport.use(new LocalStrategy( 27 | (username, password, callback) => { 28 | findUser.byUsername(username, config.users, (error, user) => { 29 | if (error) { return callback(null, false, error); } 30 | if (user) { 31 | // Check if the passwort matches the salted hash 32 | bcrypt.compare(password, user.password, (passwordError, isValid) => { 33 | if (passwordError) { return callback(passwordError); } 34 | if (isValid) { return callback(null, user); } 35 | return callback(null, false, new Error('Wrong password')); 36 | }); 37 | } 38 | }); 39 | } 40 | )); 41 | 42 | // Set the authentication strategy for API endpoints 43 | passport.use(new BearerStrategy( 44 | (token, callback) => { 45 | findUser.byToken(token, config.users, (error, user) => { 46 | if (error) { return callback(error, false, error); } 47 | if (user) { return callback(null, user, { scope: 'all' }); } 48 | }); 49 | } 50 | )); 51 | 52 | passport.serializeUser((user, callback) => { 53 | callback(null, user.id); 54 | }); 55 | 56 | passport.deserializeUser((id, callback) => { 57 | findUser.byId(id, config.users, (error, user) => { 58 | if (error) { return callback(error); } 59 | callback(null, user); 60 | }); 61 | }); 62 | 63 | // Configure view engine to render pug templates. 64 | app.set('views', path.join(__dirname, 'views')); 65 | app.set('view engine', 'pug'); 66 | 67 | // Use application-level middleware for common functionality, including 68 | // logging, parsing, and session handling. 69 | app.use(cookieParser()); 70 | app.use(bodyParser.urlencoded({ extended: true })); 71 | app.use(session({ 72 | secret: 'keyboard cat', 73 | resave: false, 74 | saveUninitialized: false 75 | })); 76 | app.use(flash()); 77 | app.use(express.static(path.join(__dirname, 'public'))); 78 | 79 | // Initialize Passport and restore authentication state, if any, from the session. 80 | app.use(passport.initialize()); 81 | app.use(passport.session()); 82 | 83 | // Connect routes 84 | app.use('/', routes); 85 | 86 | // Handle 404 errors 87 | app.use((req, res) => { 88 | res.status(404); 89 | res.render('error', { 90 | title: 'Page Not Found (404)', 91 | url: req.url 92 | }); 93 | }); 94 | 95 | // Handle 500 internal server errors 96 | app.use((error, req, res) => { 97 | res.status(500); 98 | res.render('error', { 99 | title: 'Internal Server Error (500)', 100 | error: error, 101 | url: req.url 102 | }); 103 | }); 104 | 105 | module.exports = app; 106 | -------------------------------------------------------------------------------- /public/styles.css: -------------------------------------------------------------------------------- 1 | html { 2 | box-sizing: border-box; 3 | } 4 | 5 | *, 6 | *:before, 7 | *:after { 8 | box-sizing: inherit; 9 | } 10 | 11 | body { 12 | font-family: Helvetica, Arial, sans-serif; 13 | margin: 0; 14 | background: #f0f0f4; 15 | } 16 | 17 | /* 18 | Global font style 19 | */ 20 | 21 | h1 { 22 | margin-top: 0; 23 | font-size: 1.6em; 24 | } 25 | 26 | h2 { 27 | margin-top: 0; 28 | font-size: 1.3em; 29 | } 30 | 31 | h3 { 32 | margin: 1.5em 0 0.5em; 33 | font-size: 1em; 34 | } 35 | 36 | a { 37 | color: inherit; 38 | } 39 | 40 | code, pre { 41 | color: #b22222; 42 | font-size: 1.2em; 43 | } 44 | 45 | .fail { 46 | color: #b22222; 47 | } 48 | 49 | .warning { 50 | color: #ffa500; 51 | } 52 | 53 | .description { 54 | margin-top: -.5em; 55 | } 56 | 57 | /* 58 | Global input element style 59 | */ 60 | 61 | fieldset { 62 | border: 0; 63 | padding: 0; 64 | margin: .5em 0; 65 | min-width: 0; 66 | } 67 | 68 | input, 69 | button, 70 | label { 71 | border: 0; 72 | font-size: 1em; 73 | } 74 | 75 | input { 76 | transition: border .2s ease; 77 | } 78 | 79 | input:focus, 80 | button:focus { 81 | outline: 0; 82 | } 83 | 84 | input:-webkit-autofill { 85 | -webkit-box-shadow: 0 0 0px 1000px #fff inset; 86 | } 87 | 88 | ::-webkit-input-placeholder { 89 | color: #889; 90 | } 91 | 92 | :-moz-placeholder { 93 | color: #889; 94 | opacity: 1; 95 | } 96 | 97 | ::-moz-placeholder { 98 | color: #889; 99 | opacity: 1; 100 | } 101 | 102 | :-ms-input-placeholder { 103 | color: #889; 104 | } 105 | 106 | /* 107 | Specific element style 108 | */ 109 | 110 | .left { 111 | overflow: hidden; 112 | display: block; 113 | } 114 | 115 | .left input { 116 | width: 100%; 117 | height: 2em; 118 | line-height: 2; 119 | padding: 0 .4em; 120 | border: 2px solid #ccd; 121 | border-top-left-radius: 4px; 122 | border-bottom-left-radius: 4px; 123 | border-right: 0; 124 | } 125 | 126 | .right { 127 | border-top-right-radius: 4px; 128 | border-bottom-right-radius: 4px; 129 | float: right; 130 | } 131 | 132 | /* 133 | Global page layout 134 | */ 135 | 136 | .center { 137 | width: 100%; 138 | max-width: 840px; 139 | margin: 0 auto; 140 | padding: 0 2em; 141 | } 142 | 143 | .white { 144 | background: #fff; 145 | padding: 2.5em 0 3em; 146 | margin-bottom: 3em; 147 | box-shadow: 0 1px 4px rgba(0, 0, 0, .3) 148 | } 149 | 150 | /* 151 | Login 152 | */ 153 | 154 | .login .center { 155 | max-width: 460px; 156 | } 157 | 158 | .login input:focus { 159 | border-color: #334; 160 | } 161 | 162 | .login .right { 163 | height: 2em; 164 | line-height: 2em; 165 | color: #fff; 166 | background: #334; 167 | text-align: center; 168 | width: 40px; 169 | } 170 | 171 | .login .submit { 172 | margin-top: 1em; 173 | width: 100%; 174 | height: 2em; 175 | text-align: center; 176 | color: #fff; 177 | background: #2e8b57; 178 | border-radius: 4px; 179 | cursor: pointer; 180 | transition: background .2s ease; 181 | } 182 | 183 | .login .submit:hover, 184 | .login .submit:focus { 185 | background: #21653f; 186 | } 187 | 188 | /* 189 | Navigation bar 190 | */ 191 | 192 | .navigation { 193 | font-size: .8em; 194 | color: #ccd; 195 | background: #334; 196 | padding: .5em; 197 | } 198 | 199 | .navigation a { 200 | text-decoration: none; 201 | } 202 | 203 | .navigation a:hover { 204 | color: #fff; 205 | } 206 | 207 | .navigation .logout, 208 | .navigation .profile { 209 | float: right; 210 | } 211 | 212 | .navigation span { 213 | margin: 0 .5em; 214 | } 215 | 216 | /* 217 | Search 218 | */ 219 | 220 | .search .main { 221 | margin: 1.5em 0 2em; 222 | } 223 | 224 | .search .left input:focus { 225 | border-color: #2e8b57; 226 | } 227 | 228 | .search .submit { 229 | height: 2em; 230 | width: 110px; 231 | float: right; 232 | color: #fff; 233 | background: #2e8b57; 234 | border-top-right-radius: 4px; 235 | border-bottom-right-radius: 4px; 236 | cursor: pointer; 237 | text-align: left; 238 | padding: 2px 6px 3px 14px; 239 | transition: background .2s ease; 240 | } 241 | 242 | .search .submit:hover, 243 | .search .submit:focus { 244 | background: #21653f; 245 | } 246 | 247 | .search .submit i { 248 | margin-left: 5px; 249 | } 250 | 251 | .search .options.sort { 252 | margin-top: -.25em; 253 | } 254 | 255 | .search .options > div { 256 | display: inline-block; 257 | white-space: nowrap; 258 | } 259 | 260 | .search .options strong { 261 | width: 3.5em; 262 | display: inline-block; 263 | } 264 | 265 | .search .options label { 266 | margin-right: .5em; 267 | } 268 | 269 | /* 270 | Results 271 | */ 272 | 273 | .result .document { 274 | margin: 20px 0 40px; 275 | padding: 10px; 276 | position: relative; 277 | background: #fff; 278 | box-shadow: 0 1px 4px rgba(0, 0, 0, .3); 279 | } 280 | 281 | .result .document: before, 282 | .result .document: after { 283 | content: ""; 284 | position: absolute; 285 | z-index: -1; 286 | box-shadow: 0 0 20px rgba(0, 0, 0, .8); 287 | top: 50%; 288 | bottom: 0; 289 | left: 10px; 290 | right: 10px; 291 | border-radius: 100px / 10px; 292 | } 293 | 294 | .result .header { 295 | margin-top: 5px; 296 | color: #889; 297 | } 298 | 299 | .result .download { 300 | float: right; 301 | cursor: pointer; 302 | } 303 | 304 | .result .download a { 305 | display: inline-block; 306 | font-size: .8em; 307 | padding: 4px 5px; 308 | background: #889; 309 | color: #fff; 310 | border-radius: 3px; 311 | margin-right: 3px; 312 | text-decoration: none; 313 | transition: background .2s ease; 314 | } 315 | 316 | .result .download a:hover { 317 | background: #667; 318 | } 319 | 320 | .result .hit { 321 | font-size: .9em; 322 | } 323 | 324 | .result .hit em { 325 | font-style: normal; 326 | font-weight: bold; 327 | background: #ffff00; 328 | } 329 | 330 | .result .count p:first-child { 331 | font-weight: bold; 332 | } 333 | 334 | /* 335 | Help 336 | */ 337 | 338 | .help ul { 339 | list-style: none; 340 | padding: 0; 341 | } 342 | -------------------------------------------------------------------------------- /public/font/icons.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Copyright (C) 2017 by original authors @ fontello.com 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Frontend 2 | 3 | [![License](https://img.shields.io/github/license/br-data/elasticsearch-frontend.svg?style=flat-square)]() [![GitHub release](https://img.shields.io/github/release/br-data/elasticsearch-frontend.svg?style=flat-square)]() [![GitHub issues](https://img.shields.io/github/issues/br-data/elasticsearch-frontend.svg?style=flat-square)]() 4 | 5 | Simple search interface for large document collections in [Elasticsearch](https://www.elastic.co/de/products/elasticsearch). Made for the exploration and analysis of big document leaks. The application is build with [Express](https://expressjs.com/) and [Pug](https://pugjs.org/). User authentication and protected routes are provided by [Passport](http://passportjs.org/). 6 | 7 | ## History 8 | The initial prototype was build to uncover the tax haven in the free trade zone of Madeira. We used Elasticsearch to build a document search for the [Madeira Gazette](www.gov-madeira.pt/joram/). Many of those big PDF files are simple document scans which we wanted to search for persons and company names. Read the whole story: [Madeira – A Tax Haven Approved by the European Commission](http://web.br.de/madeira/english/) 9 | 10 | Why build another document search engine? – Because it super lightweight and customizable. Until we add more features. 11 | 12 | ## Requirements 13 | The application is written in JavaScript. You'll need **Node.js v6** at least, to run the application. Check out the [Node.js installation guide](https://nodejs.org/en/download/package-manager/). We use **Elasticsearch 2.4** for document storage and search. For further details, please refer to the [Elasticsearch installation guide](https://www.elastic.co/guide/en/elasticsearch/reference/2.4/_installation.html). 14 | 15 | To check if your Elasticsearch is up and running, call the REST-Interface from the command line: 16 | 17 | ``` 18 | $ curl -XGET http://localhost:9200/_cluster/health\?pretty\=1 19 | ``` 20 | 21 | If you are seeing a _Unassigned shards_ warning, you might consider setting the numbers of replicas to 0. This works fine in a development environment: 22 | 23 | ``` 24 | $ curl -XPUT 'localhost:9200/_settings' -d ' 25 | { 26 | index: { 27 | number_of_replicas : 0 28 | } 29 | }' 30 | ``` 31 | 32 | To check if your document are all in place, run a simple search query on your index: 33 | 34 | ``` 35 | $ curl -XGET 'localhost:9200/my-index/_search?q=body:my-query&pretty' 36 | ``` 37 | 38 | ## Installation 39 | Installation and configuration is straight forward, once Elasticsearch is set up. 40 | 41 | 1. Import documents to Elasticsearch: If you have never done that before, there is another repo dedicated to extracting text from PDF files and importing them to Elasticsearch: [elasticsearch-import-tools](https://github.com/br-data/elasticsearch-import-tools) 42 | 2. Edit the `config/config.development.js` file. 43 | 3. Start the server: `npm start`. 44 | 4. Go to http://localhost:3000. The default username is `user` and the password is `password`. 45 | 46 | ## Searching 47 | There are four different ways to search for whole sentences (full-text) or a single word (term): 48 | 49 | **Standard search** (full-text search): Finds exact word combinations like `John Doe`. Diacritcs are ignored and a search for `John Doe` will also find `Jóhñ Döé`. 50 | 51 | **Custom search** (full-text search): By default, the custom search finds all documents that contain `John` AND `Doe`. Supports wildcards and simple search operators: 52 | 53 | - `+` signifies AND operation 54 | - `|` signifies OR operation 55 | - `-` negates a single token 56 | - `"` wraps a number of tokens to signify a phrase for searching 57 | - `*` at the end of a term signifies a prefix query 58 | - `~N` after a word signifies edit distance (fuzziness) 59 | - `~N` after a phrase signifies slop amount 60 | 61 | **Fuzzy search** (term-based search): Finds words, even if they contain a typo or OCR mistake. A search for `Jhon` or `J°hn` will also find `John`. 62 | 63 | **Regex search** (term-based search): Uses Regex patterns like `J.h*` for searching. This Regex will find words such as `John`, `Jahn` and `Johnson`. 64 | 65 | ## Customization 66 | If you want to change the page title and description, simply update the configuration `config/config.development.js`. 67 | 68 | ``` 69 | config.page = { 70 | title: 'Document Search', 71 | description: 'Search Elasticsearch documents for persons, companies and addresses.' 72 | }; 73 | ``` 74 | 75 | ## Authentication 76 | The current authentication strategy is username and password, using [passport-local](https://github.com/jaredhanson/passport-local). Passport provides many different authentication strategies as Express middleware. If you want to change the authentication method, go ahead, check out the [Passport docs](http://passportjs.org/). 77 | 78 | For the ease of development, valid users are stored in the configuration `config/config.development.js`: 79 | 80 | ```javascript 81 | config.users = [ 82 | { 83 | id: 1, 84 | username: 'user', 85 | password: '$2a$10$vP0qJyEd0hvvpG5MAaHg9ObUJJpJj9HxINZ/Yqz5nPo5Ms2nhR4r.', 86 | displayName: 'Demo User', 87 | apiToken: '0b414d8433124406be6500833f1672e5' 88 | } 89 | ]; 90 | ``` 91 | 92 | New password hashes are created using [bcrypt](https://github.com/kelektiv/node.bcrypt.js): 93 | 94 | ```javacript 95 | const bcrypt = require('bcrypt') 96 | const saltRounds = 10 97 | const myPlaintextPassword = 'password' 98 | const salt = bcrypt.genSaltSync(saltRounds) 99 | const passwordHash = bcrypt.hashSync(myPlaintextPassword, salt) 100 | 101 | ``` 102 | 103 | Note that the list of user could easily be stored in a database like MongoDB. 104 | 105 | ## API 106 | 107 | ``` 108 | curl -H "Authorization: Bearer 0b414d8433124406be6500833f1672e5" http://127.0.0.1:3000/api 109 | curl "http://127.0.0.1:3000/api?access_token=0b414d8433124406be6500833f1672e5" 110 | 111 | curl -H "Authorization: Bearer 0b414d8433124406be6500833f1672e5" "http://localhost:3000/api/search?query=ciboule&type=match&sorting=date" 112 | ``` 113 | 114 | ## Deployment 115 | To deploy the application in a live environment, create a new configuration `config/config.production.js`. Update it with all your server information, Elasticsearch host, credentials etc. 116 | 117 | Use the new configuration by starting node with the environment variable set to `production`: 118 | 119 | ``` 120 | $ NODE_ENV=production node bin/www 121 | ``` 122 | 123 | To keep it running, use a process manager like [forever](https://github.com/foreverjs/forever) or [PM2](https://github.com/Unitech/pm2): 124 | 125 | ``` 126 | $ NODE_ENV=production forever start bin/www 127 | ``` 128 | 129 | It's advisable to use SSL/TLS encryption for all connections to the server. One way to do this, is routing your Node.js application through an Apache or Nginx proxy with HTTPS enabled. 130 | 131 | ## Debugging 132 | The app uses [debug](https://github.com/visionmedia/debug) as it's core debugging utility. To set the app into debug mode set the environment variable `debug`. 133 | 134 | ``` 135 | export DEBUG=* 136 | ``` 137 | 138 | If you are on a Winodws machine use: 139 | 140 | ``` 141 | set DEBUG=* 142 | ``` 143 | 144 | ## Planned features 145 | - Add (inline) document viewer 146 | - Add document import and ingestion 147 | - Add direct API access 148 | - Split data retrieval and rendering 149 | 150 | ## Similar projects: 151 | If you are looking for alternatives, check out: 152 | - OCCRP: [Aleph](https://github.com/alephdata/aleph), powering the [Investigative Dashboard](http://data.occrp.org) 153 | - ICIJ: [Datashare](https://datashare.icij.org/) 154 | - EIC: [Hoover](https://hoover.github.io/) 155 | - New York Times: [Stevedore](https://github.com/newsdev/stevedore) 156 | - [DocumentCloud](https://github.com/documentcloud) 157 | - [Open Semantic Search](https://www.opensemanticsearch.org) 158 | - [Overview](https://www.overviewdocs.com/) 159 | --------------------------------------------------------------------------------