├── .eslintignore
├── public
├── font
│ ├── icons.eot
│ ├── icons.ttf
│ ├── icons.woff
│ ├── icons.woff2
│ └── icons.svg
├── search.js
├── icons.css
└── styles.css
├── config
├── index.js
└── config.development.js
├── .editorconfig
├── CHANGELOG.md
├── views
├── error.pug
├── profile.pug
├── navigation.pug
├── result.pug
├── layout.pug
├── login.pug
├── help.pug
└── search.pug
├── .eslintrc
├── .gitignore
├── lib
├── checkLogin.js
├── checkToken.js
├── queryElastic.js
└── findUser.js
├── package.json
├── LICENSE
├── bin
└── www
├── routes
└── index.js
├── app.js
└── README.md
/.eslintignore:
--------------------------------------------------------------------------------
1 | dist/
2 |
--------------------------------------------------------------------------------
/public/font/icons.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.eot
--------------------------------------------------------------------------------
/public/font/icons.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.ttf
--------------------------------------------------------------------------------
/public/font/icons.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.woff
--------------------------------------------------------------------------------
/public/font/icons.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/br-data/elasticsearch-frontend/HEAD/public/font/icons.woff2
--------------------------------------------------------------------------------
/config/index.js:
--------------------------------------------------------------------------------
1 | const env = process.env.NODE_ENV || 'development';
2 | const config = require(`./config.${env}`);
3 |
4 | module.exports = config;
5 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | indent_style = space
3 | indent_size = 2
4 | end_of_line = lf
5 | charset = utf-8
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 |
9 | [*.md]
10 | trim_trailing_whitespace = false
11 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ### 1.1
4 | - Add simple REST API for search
5 | - Add authentication for API endpoints
6 | - Add user profile page
7 |
8 | ### 1.0
9 | - Add basic search interface
10 | - Add login and session-based authentication
11 | - Add help page and setup instructions
12 | - Add simple user management
13 | - Add configuration
14 |
--------------------------------------------------------------------------------
/views/error.pug:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 |
5 | include navigation
6 |
7 | section.error.white
8 |
9 | .center
10 |
11 | h1=title
12 |
13 | p Sorry for the inconvenience. Return to the homepage.
14 |
15 | if error
16 | p.fail
17 | strong=error.message
18 |
19 | pre=message.stack
20 |
21 |
--------------------------------------------------------------------------------
/views/profile.pug:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 |
5 | include navigation
6 |
7 | section.profile.white
8 |
9 | .center
10 |
11 | h1 User profile
12 |
13 | p Hello #{user.screenname}
14 |
15 | p Your API token:
16 | pre=user.apiToken
17 |
18 | if error
19 | p.fail
20 | strong=error.message
21 |
22 | pre=message.stack
23 |
--------------------------------------------------------------------------------
/public/search.js:
--------------------------------------------------------------------------------
1 | document.addEventListener('DOMContentLoaded', changeIcon, false);
2 |
3 | function changeIcon() {
4 | var $loadingIcon = document.querySelector('.loading');
5 | var $submitButton = document.querySelector('.submit');
6 |
7 | if ($loadingIcon) {
8 | $submitButton.addEventListener('click', function () {
9 | $loadingIcon.className = 'icon-spinner';
10 | }, false);
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | extends: 'eslint:recommended',
3 | parser: 'babel-eslint',
4 | 'rules': {
5 | 'indent': [2, 2, { 'SwitchCase': 1 }],
6 | 'quotes': [2, 'single'],
7 | 'linebreak-style': [2, 'unix'],
8 | 'semi': [2, 'always'],
9 | 'comma-dangle': [1, 'never'],
10 | 'no-trailing-spaces': [2, { 'skipBlankLines': true }],
11 | 'no-console': 0
12 | },
13 | 'env': {
14 | 'node': true
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/views/navigation.pug:
--------------------------------------------------------------------------------
1 | nav.navigation
2 |
3 | span
4 | a(href='/')
5 | i(class='icon-search')
6 | | Search
7 | span
8 | a(href='#')
9 | i(class='icon-upload-cloud')
10 | | Import
11 | span
12 | a(href='/help')
13 | i(class='icon-help')
14 | | Help
15 |
16 | if user
17 | span.logout
18 | a(href='/logout')
19 | i(class='icon-logout')
20 | | Logout
21 | span.profile
22 | a(href='/profile')
23 | i(class='icon-user')
24 | | Profile
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Project ###
2 | pdf/
3 | text/
4 | contratos-text/
5 | contratos-pdf/
6 |
7 | ### Node ###
8 | node_modules
9 | logs
10 | *.log
11 | pids
12 | *.pid
13 | *.seed
14 | .grunt
15 | .tmp
16 | build/Release
17 | .lock-wscript
18 |
19 | ### Windows ###
20 | Thumbs.db
21 | ehthumbs.db
22 | Desktop.ini
23 | $RECYCLE.BIN/
24 |
25 | ### OSX ###
26 | .DS_Store
27 | .AppleDouble
28 | .LSOverride
29 | Icon
30 | ._*
31 | .Spotlight-V100
32 | .Trashes
33 |
34 | ### Project Files ###
35 | .idea/
36 | .idea_modules/
37 | atlassian-ide-plugin.xml
38 | com_crashlytics_export_strings.xml
39 | *.sublime-workspace
40 |
--------------------------------------------------------------------------------
/lib/checkLogin.js:
--------------------------------------------------------------------------------
1 | // Checks if a user is authenticated
2 | // @TODO Preserve query string on redirect
3 | function checkLogin(options) {
4 | options = options || {};
5 |
6 | const url = options.redirectTo || '/login';
7 | const setReturnTo = (options.setReturnTo === undefined) ? true : options.setReturnTo;
8 |
9 | return (req, res, next) => {
10 | if (!req.isAuthenticated || !req.isAuthenticated()) {
11 | if (setReturnTo && req.session) {
12 | req.session.returnTo = req.originalUrl || req.url;
13 | }
14 | res.redirect(url);
15 | }
16 | next();
17 | };
18 | }
19 |
20 | module.exports = checkLogin;
21 |
--------------------------------------------------------------------------------
/views/result.pug:
--------------------------------------------------------------------------------
1 | extends search
2 |
3 | block result
4 |
5 | if error
6 | p.fail
7 | strong=error.message
8 |
9 | else
10 | .count
11 | p Found #{result.hits.total} documents (#{result.took} ms)
12 |
13 | each doc in result.hits.hits
14 | .document
15 | .download
16 | a.pdf(href=doc._source.file target='blank')
17 | i(class='icon-file-pdf')
18 | span PDF
19 | a.text(href=doc._source.file target='blank')
20 | i(class='icon-doc-text')
21 | span Text
22 | .header.doc
23 | span.title #{doc._source.name}
24 | each hit in doc.highlight.body
25 | p.hit!=hit
26 |
--------------------------------------------------------------------------------
/lib/checkToken.js:
--------------------------------------------------------------------------------
1 | // Checks if a token is valid
2 | const passport = require('passport');
3 |
4 | function checkToken() {
5 | return (req, res, next) => {
6 | return passport.authenticate('bearer', {
7 | session: false
8 | }, (error, user) => {
9 | if (!user) {
10 | if (error) {
11 | res.status(401);
12 | res.json({ error: error.message });
13 | res.end();
14 | } else {
15 | res.status(401);
16 | res.json({ error: 'Please provide an API token' });
17 | res.end();
18 | }
19 | } else {
20 | next();
21 | }
22 | })(req, res, next);
23 | };
24 | }
25 |
26 | module.exports = checkToken;
27 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "elasticsearch-frontend",
3 | "description": "Search and explore document leaks with Elasticsearch",
4 | "author": "Steffen Kühne",
5 | "version": "1.1.0",
6 | "license": "MIT",
7 | "private": true,
8 | "repository": {
9 | "type": "git",
10 | "url": "git@github.com:br-data/elasticsearch-frontend.git"
11 | },
12 | "scripts": {
13 | "start": "node ./bin/www"
14 | },
15 | "dependencies": {
16 | "bcrypt": "^5.0.0",
17 | "body-parser": "^1.19.0",
18 | "connect-ensure-login": "^0.1.1",
19 | "cookie-parser": "^1.4.4",
20 | "debug": "^3.2.6",
21 | "elasticsearch": "^15.5.0",
22 | "express": "^4.17.1",
23 | "express-session": "^1.16.2",
24 | "node-dir": "^0.1.17",
25 | "passport": "^0.4.0",
26 | "passport-http-bearer": "^1.0.1",
27 | "passport-local": "^1.0.0",
28 | "pug": "^3.0.1",
29 | "req-flash": "0.0.3"
30 | },
31 | "devDependencies": {
32 | "eslint-config-esnext": "^4.0.0"
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/views/layout.pug:
--------------------------------------------------------------------------------
1 | doctype html
2 |
3 | html
4 |
5 | head
6 | meta(charset='utf-8')
7 | title=page.title
8 | link(href='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAB8UlEQVRIS7XVS6hNcRTH8c/1KGSCARIDUkrERMljYiAUwkwSKRMyQAYIAyLMZSAZSEkIEzOvMJA8rmSglJQJ8oyQlta97bvPPvvswbmrdvuc/r//+q7Hf/13j0G2nob+RyCez/jbcM9/WR1gJrZhOaak0594hPM4gx+dYFWA4TiGHRhS4+ANNuBOHaQMGIZLWFnY9A738QVTMR8RRNgvrMGNdpAy4Cj2pPgDtuNCqe4TcRzrUxfgOXhdBSkCpuElIouPWIgXNemfwM5cj6zXdQJEVLtStAlnOzRwKB5jVmY4GVHOAVbM4AlmI0ozHr87nRBsxanUbcS5OsB3jMRNLG3gPCQRUAQWdhj76gARcaR9FasbAqbjVWpPFkrcv71YoreYlI2NIWtiK3A9hbsRjW/bg4uFkxCpP2tAiJrHsIXFqbtXB1iFKym4hSX4UwNZgNs57TEDUa6We6pYovj9APPSaWS0GV8rIItwGeNyLZobTW6x8iRHFAEZm8o416cz0pjYGMYYqLWlizKCWIa7dSXqW5uLa9nwBm3ol1RC2l3XY3AQWzCqgtKLAznF8e6zFkinD85oLMaMBL3HQzwtOI1A2kI6AZqWqAoS1/rzbgEikDJkL450ExCQ/TiET4ij3NttQEAm5NfvW/wZDMCAvv0DZEFgGQvWNqMAAAAASUVORK5CYII=' rel='icon' type='image/x-icon')
9 |
10 | link(rel='stylesheet' href='/icons.css')
11 | link(rel='stylesheet' href='/styles.css')
12 |
13 | body
14 | block content
15 |
--------------------------------------------------------------------------------
/views/login.pug:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 |
5 | include navigation
6 |
7 | section.login.white
8 |
9 | .center
10 |
11 | h1.title=page.title
12 | p.description=page.description
13 |
14 | if error.message
15 | p.fail
16 | strong=error.message
17 | else
18 | p
19 | strong Please log in to continue:
20 |
21 | form(action='/login' method='post')
22 |
23 | fieldset
24 | span.right
25 | i(class='icon-user')
26 | span.left
27 | input(
28 | type='text'
29 | name='username'
30 | placeholder='Username'
31 | )
32 |
33 | fieldset
34 | span.right
35 | i(class='icon-lock-open-alt')
36 | span.left
37 | input(
38 | type='password'
39 | name='password'
40 | placeholder='Password'
41 | )
42 |
43 | fieldset
44 | button.submit(type='submit' value='Login') Login
45 | i.icon-login
46 |
--------------------------------------------------------------------------------
/lib/queryElastic.js:
--------------------------------------------------------------------------------
1 | // Queries Elasticsearch for data using different methods
2 | const elastic = require('elasticsearch');
3 | const config = require('../config');
4 |
5 | const elasticClient = new elastic.Client({ host: config.database.host });
6 |
7 | function queryElastic() {
8 | return (req, res, next) => {
9 | if (req.query.query && req.query.type) {
10 | elasticClient.search(buildQuery(req), (error, data) => {
11 | req.error = error;
12 | req.result = data;
13 | next();
14 | });
15 | } else {
16 | req.error = new Error('Query string and query type are undefined');
17 | req.result = null;
18 | next();
19 | }
20 | };
21 | }
22 |
23 | function buildQuery(req) {
24 | let query = config.queries[req.query.type];
25 |
26 | query.setQuery(req.query.query);
27 | query = query.query;
28 |
29 | return {
30 | index: config.database.index,
31 | size: 500,
32 | body: {
33 | query: query,
34 | _source: config._source,
35 | highlight: config.highlight
36 | }
37 | };
38 | }
39 |
40 | module.exports = queryElastic;
41 |
--------------------------------------------------------------------------------
/lib/findUser.js:
--------------------------------------------------------------------------------
1 | function findByUsername(username, database, callback) {
2 | process.nextTick(() => {
3 | const user = database.filter(user => user.username === username);
4 |
5 | if (user && user.length > 0) {
6 | callback(null, user[0]);
7 | } else {
8 | callback(new Error('Invalid username'));
9 | }
10 | });
11 | }
12 |
13 | function findById (id, database, callback) {
14 | process.nextTick(() => {
15 | const user = database.filter(user => user.id === id);
16 |
17 | if (user && user.length > 0) {
18 | callback(null, user[0]);
19 | } else {
20 | callback(new Error('Invalid user ID'));
21 | }
22 | });
23 | }
24 |
25 | function findByToken(token, database, callback) {
26 | process.nextTick(() => {
27 | const user = database.filter(user => user.apiToken === token);
28 |
29 | if (user && user.length > 0) {
30 | callback(null, user[0]);
31 | } else {
32 | callback(new Error('Invalid API token'));
33 | }
34 | });
35 | }
36 |
37 | module.exports = {
38 | byUsername: findByUsername,
39 | byId: findById,
40 | byToken: findByToken
41 | };
42 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Bayerischer Rundfunk
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/bin/www:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | const debug = require('debug')('app');
4 | const app = require('../app.js');
5 | const port = process.env.PORT || 3000;
6 |
7 | const server = app.listen(process.env.PORT || 3000, () => {
8 | debug(`Express server listening on port ${server.address().port}`);
9 | });
10 |
11 | function onError(err) {
12 | if (err.syscall !== 'listen') {
13 | throw err;
14 | }
15 |
16 | const bind = typeof port === 'string' ? `Pipe ${port}` : `Port ${port}`;
17 |
18 | // Handle specific listen errors with friendly messages
19 | switch (err.code) {
20 | case 'EACCES':
21 | console.error(`${bind} requires elevated privileges`);
22 | process.exit(1);
23 | break;
24 | case 'EADDRINUSE':
25 | console.error(`${bind} is already in use`);
26 | process.exit(1);
27 | break;
28 | default:
29 | throw err;
30 | }
31 | }
32 |
33 | function onListening() {
34 | const addr = server.address();
35 | const bind = typeof addr === 'string' ? `Pipe ${addr}` : `Port ${addr.port}`;
36 | debug(`Listening on ${bind}`);
37 | }
38 |
39 | server.on('error', onError);
40 | server.on('listening', onListening);
41 |
--------------------------------------------------------------------------------
/views/help.pug:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 |
5 | include navigation
6 |
7 | section.help.white
8 |
9 | .center
10 |
11 | h1 About the search
12 | p There are four different ways to search for whole sentences (full-text) or a single words (term):
13 |
14 | h3 Standard search
15 | p Full-text search. Finds exact word combinations like John Doe. Diacritcs are ignored and a search for John Doe will also find Jóhñ Döé.
16 |
17 | h3 Custom search
18 | p Full-text search. By default, the custom search finds all documents that contain John AND Doe. Supports wildcards and simple search operators:
19 | ul
20 | li + signifies AND operation
21 | li | signifies OR operation
22 | li - negates a single token
23 | li " wraps a number of tokens to signify a phrase for searching
24 | li * at the end of a term signifies a prefix query
25 | li ~N after a word signifies edit distance (fuzziness)
26 | li ~N after a phrase signifies slop amount
27 |
28 | h3 Fuzzy search
29 | p Term-based search. Finds words, even if they contain a typo or OCR mistake. A search for Jhon or J°hn will also find John.
30 |
31 | h3 Regex search
32 | p Term-based search for experts. Uses Regex patterns like J.h* for searching. This Regex will find words such as John, Jahn and Johnson.
33 |
--------------------------------------------------------------------------------
/config/config.development.js:
--------------------------------------------------------------------------------
1 | const config = {};
2 |
3 | config.page = {
4 | title: 'Document Search',
5 | description: 'Search Elasticsearch documents for persons, companies and addresses.'
6 | };
7 |
8 | config.users = [
9 | {
10 | id: 1,
11 | username: 'user',
12 | screenname: 'Demo User',
13 | password: '$2a$10$vP0qJyEd0hvvpG5MAaHg9ObUJJpJj9HxINZ/Yqz5nPo5Ms2nhR4r.',
14 | apiToken: '0b414d8433124406be6500833f1672e5'
15 | }
16 | ];
17 |
18 | config.database = {
19 | type: 'elasticsearch',
20 | host: 'localhost:9200',
21 | index: 'joram'
22 | };
23 |
24 | config.queries = {
25 | match: {
26 | name: 'Standard Search',
27 | query: {
28 | multi_match: {
29 | query: undefined,
30 | fields: ['body', 'body.folded'],
31 | type: 'phrase'
32 | }
33 | },
34 | setQuery: function (query) {
35 | this.query.multi_match.query = query;
36 | }
37 | },
38 | custom: {
39 | name: 'Custom Search',
40 | query: {
41 | simple_query_string: {
42 | query: undefined,
43 | fields: ['body','body.folded'],
44 | default_operator: 'and',
45 | analyze_wildcard: true
46 | }
47 | },
48 | setQuery: function (query) {
49 | this.query.simple_query_string.query = query;
50 | }
51 | },
52 | fuzzy: {
53 | name: 'Fuzzy Search',
54 | query: {
55 | fuzzy: {
56 | body: undefined
57 | }
58 | },
59 | setQuery: function (query) {
60 | this.query.fuzzy.body = query;
61 | }
62 | },
63 | regexp: {
64 | name: 'Regex Search',
65 | query: {
66 | regexp: {
67 | body: undefined
68 | }
69 | },
70 | setQuery: function (query) {
71 | this.query.regexp.body = query;
72 | }
73 | }
74 | };
75 |
76 | config._source = {
77 | excludes: ['body*']
78 | };
79 |
80 | config.highlight = {
81 | fields: {
82 | body: {}
83 | //'body.folded': {}
84 | }
85 | };
86 |
87 | module.exports = config;
88 |
--------------------------------------------------------------------------------
/views/search.pug:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 |
5 | include navigation
6 |
7 | script(src='/search.js')
8 |
9 | header.search.white
10 |
11 | .center
12 |
13 | h1.title=page.title
14 | p.description=page.description
15 |
16 | form(action='search')
17 |
18 | fieldset.main
19 | span.right
20 | button.submit(type='submit') Search
21 | i.icon-search.loading
22 | span.left
23 | input(
24 | type='search'
25 | name='query'
26 | placeholder='Start searching'
27 | value=(query.query ? query.query : '')
28 | )
29 |
30 | fieldset.options.type
31 | strong Type:
32 | div
33 | input(
34 | type='radio'
35 | name='type'
36 | value='match'
37 | checked=(query.type === 'match' ? 'checked' : '')
38 | )
39 | label Standard
40 | div
41 | input(
42 | type='radio'
43 | name='type'
44 | value='custom'
45 | checked=(query.type === 'custom' ? 'checked' : false)
46 | )
47 | label Custom
48 | div
49 | input(
50 | type='radio'
51 | name='type'
52 | value='fuzzy'
53 | checked=(query.type === 'fuzzy' ? 'checked' : false)
54 | )
55 | label Fuzzy
56 | div
57 | input(
58 | type='radio'
59 | name='type'
60 | value='regexp'
61 | checked=(query.type === 'regexp' ? 'checked' : false)
62 | )
63 | label Regex
64 |
65 | fieldset.options.sort
66 | strong Sort:
67 | div
68 | input(
69 | type='radio'
70 | name='sorting'
71 | value='date'
72 | checked=(query.sorting === 'date' ? 'checked' : '')
73 | )
74 | label Date
75 | div
76 | input(
77 | type='radio'
78 | name='sorting'
79 | value='relevance'
80 | checked=(query.sorting === 'relevance' ? 'checked' : false)
81 | )
82 | label Relevance
83 |
84 | section.result.center
85 | block result
86 |
--------------------------------------------------------------------------------
/public/icons.css:
--------------------------------------------------------------------------------
1 | @font-face {
2 | font-family: 'icons';
3 | src: url('./font/icons.eot?43890422');
4 | src: url('./font/icons.eot?43890422#iefix') format('embedded-opentype'),
5 | url('./font/icons.woff2?43890422') format('woff2'),
6 | url('./font/icons.woff?43890422') format('woff'),
7 | url('./font/icons.ttf?43890422') format('truetype'),
8 | url('./font/icons.svg?43890422#icons') format('svg');
9 | font-weight: normal;
10 | font-style: normal;
11 | }
12 |
13 | [class^="icon-"]:before,
14 | [class*=" icon-"]:before {
15 | font-family: "icons";
16 | font-style: normal;
17 | font-weight: normal;
18 | speak: none;
19 | display: inline-block;
20 | text-decoration: inherit;
21 | width: 1em;
22 | margin-right: .2em;
23 | text-align: center;
24 | font-variant: normal;
25 | text-transform: none;
26 | line-height: 1em;
27 | margin-left: .2em;
28 | -webkit-font-smoothing: antialiased;
29 | -moz-osx-font-smoothing: grayscale;
30 | }
31 |
32 | .icon-logout:before { content: '\e800'; } /* '' */
33 | .icon-login:before { content: '\e801'; } /* '' */
34 | .icon-search:before { content: '\e802'; } /* '' */
35 | .icon-user:before { content: '\e804'; } /* '' */
36 | .icon-lock:before { content: '\e803'; } /* '' */
37 | .icon-github-circled:before { content: '\f09b'; } /* '' */
38 | .icon-upload-cloud:before { content: '\f0ee'; } /* '' */
39 | .icon-doc-text:before { content: '\f0f6'; } /* '' */
40 | .icon-spinner:before { content: '\f110'; } /* '' */
41 | .icon-help:before { content: '\f128'; } /* '' */
42 | .icon-lock-open-alt:before { content: '\f13e'; } /* '' */
43 | .icon-doc-inv:before { content: '\f15b'; } /* '' */
44 | .icon-doc-text-inv:before { content: '\f15c'; } /* '' */
45 | .icon-sort-alt-down:before { content: '\f161'; } /* '' */
46 | .icon-file-pdf:before { content: '\f1c1'; } /* '' */
47 |
48 | .icon-spinner {
49 | display: inline-block;
50 | webkit-animation: spin 1s infinite steps(8);
51 | animation: spin 1s infinite steps(8);
52 | }
53 |
54 | @-webkit-keyframes spin {
55 | 0% {
56 | -webkit-transform: rotate(0deg);
57 | transform: rotate(0deg);
58 | }
59 | 100% {
60 | -webkit-transform: rotate(359deg);
61 | transform: rotate(359deg);
62 | }
63 | }
64 |
65 | @keyframes spin {
66 | 0% {
67 | -webkit-transform: rotate(0deg);
68 | transform: rotate(0deg);
69 | }
70 | 100% {
71 | -webkit-transform: rotate(359deg);
72 | transform: rotate(359deg);
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/routes/index.js:
--------------------------------------------------------------------------------
1 | const express = require('express');
2 | const router = express.Router();
3 | const passport = require('passport');
4 |
5 | const checkLogin = require('../lib/checkLogin');
6 | const checkToken = require('../lib/checkToken');
7 | const queryElastic = require('../lib/queryElastic');
8 |
9 | // Define routes
10 | router.get('/',
11 | checkLogin({ redirectTo: 'login' }),
12 | (req, res) => {
13 | res.render('search', {
14 | error: req.error,
15 | result: req.result,
16 | query: req.query,
17 | user: req.user
18 | });
19 | }
20 | );
21 |
22 | router.get('/login',
23 | (req, res) => {
24 | res.render('login', {
25 | error: { message: req.flash('error') }
26 | });
27 | }
28 | );
29 |
30 | router.post('/login',
31 | passport.authenticate('local', {
32 | failureRedirect: '/login',
33 | failureFlash: true
34 | }),
35 | (req, res) => {
36 | res.redirect('/');
37 | }
38 | );
39 |
40 | router.get('/logout',
41 | checkLogin({ redirectTo: 'login' }),
42 | (req, res) => {
43 | req.logout();
44 | res.redirect('/');
45 | }
46 | );
47 |
48 | router.get('/profile',
49 | checkLogin({ redirectTo: 'login' }),
50 | (req, res) => {
51 | res.render('profile', {
52 | error: req.error,
53 | result: req.result,
54 | query: req.query,
55 | user: req.user
56 | });
57 | }
58 | );
59 |
60 | router.get('/search',
61 | checkLogin({ redirectTo: 'login' }),
62 | queryElastic(),
63 | (req, res) => {
64 | res.render('result', {
65 | error: req.error,
66 | result: req.result,
67 | query: req.query,
68 | user: req.user
69 | });
70 | }
71 | );
72 |
73 | router.get('/help',
74 | checkLogin({ redirectTo: 'login' }),
75 | (req, res) => {
76 | res.render('help', {
77 | error: req.error,
78 | result: req.result,
79 | query: req.query,
80 | user: req.user
81 | });
82 | }
83 | );
84 |
85 | router.get('/api',
86 | passport.authenticate('bearer', {
87 | session: false
88 | }),
89 | (req, res) => {
90 | res.json({
91 | 'message': 'API is up and running',
92 | 'username': req.user.username
93 | });
94 | }
95 | );
96 |
97 | router.get('/api/search',
98 | checkToken(),
99 | queryElastic(),
100 | (req, res) => {
101 | res.json({
102 | error: req.error,
103 | result: req.result
104 | });
105 | }
106 | );
107 |
108 | module.exports = router;
109 |
--------------------------------------------------------------------------------
/app.js:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 |
3 | const express = require('express');
4 | const session = require('express-session');
5 | const app = express();
6 |
7 | const bcrypt = require('bcrypt');
8 |
9 | const passport = require('passport');
10 | const LocalStrategy = require('passport-local').Strategy;
11 | const BearerStrategy = require('passport-http-bearer').Strategy;
12 |
13 | const flash = require('req-flash');
14 | const cookieParser = require('cookie-parser');
15 | const bodyParser = require('body-parser');
16 |
17 | const routes = require('./routes');
18 | const findUser = require('./lib/findUser');
19 |
20 | const config = require('./config');
21 |
22 | // Copy page config to global
23 | app.locals.page = config.page;
24 |
25 | // Set the authentication strategy for the web interface
26 | passport.use(new LocalStrategy(
27 | (username, password, callback) => {
28 | findUser.byUsername(username, config.users, (error, user) => {
29 | if (error) { return callback(null, false, error); }
30 | if (user) {
31 | // Check if the passwort matches the salted hash
32 | bcrypt.compare(password, user.password, (passwordError, isValid) => {
33 | if (passwordError) { return callback(passwordError); }
34 | if (isValid) { return callback(null, user); }
35 | return callback(null, false, new Error('Wrong password'));
36 | });
37 | }
38 | });
39 | }
40 | ));
41 |
42 | // Set the authentication strategy for API endpoints
43 | passport.use(new BearerStrategy(
44 | (token, callback) => {
45 | findUser.byToken(token, config.users, (error, user) => {
46 | if (error) { return callback(error, false, error); }
47 | if (user) { return callback(null, user, { scope: 'all' }); }
48 | });
49 | }
50 | ));
51 |
52 | passport.serializeUser((user, callback) => {
53 | callback(null, user.id);
54 | });
55 |
56 | passport.deserializeUser((id, callback) => {
57 | findUser.byId(id, config.users, (error, user) => {
58 | if (error) { return callback(error); }
59 | callback(null, user);
60 | });
61 | });
62 |
63 | // Configure view engine to render pug templates.
64 | app.set('views', path.join(__dirname, 'views'));
65 | app.set('view engine', 'pug');
66 |
67 | // Use application-level middleware for common functionality, including
68 | // logging, parsing, and session handling.
69 | app.use(cookieParser());
70 | app.use(bodyParser.urlencoded({ extended: true }));
71 | app.use(session({
72 | secret: 'keyboard cat',
73 | resave: false,
74 | saveUninitialized: false
75 | }));
76 | app.use(flash());
77 | app.use(express.static(path.join(__dirname, 'public')));
78 |
79 | // Initialize Passport and restore authentication state, if any, from the session.
80 | app.use(passport.initialize());
81 | app.use(passport.session());
82 |
83 | // Connect routes
84 | app.use('/', routes);
85 |
86 | // Handle 404 errors
87 | app.use((req, res) => {
88 | res.status(404);
89 | res.render('error', {
90 | title: 'Page Not Found (404)',
91 | url: req.url
92 | });
93 | });
94 |
95 | // Handle 500 internal server errors
96 | app.use((error, req, res) => {
97 | res.status(500);
98 | res.render('error', {
99 | title: 'Internal Server Error (500)',
100 | error: error,
101 | url: req.url
102 | });
103 | });
104 |
105 | module.exports = app;
106 |
--------------------------------------------------------------------------------
/public/styles.css:
--------------------------------------------------------------------------------
1 | html {
2 | box-sizing: border-box;
3 | }
4 |
5 | *,
6 | *:before,
7 | *:after {
8 | box-sizing: inherit;
9 | }
10 |
11 | body {
12 | font-family: Helvetica, Arial, sans-serif;
13 | margin: 0;
14 | background: #f0f0f4;
15 | }
16 |
17 | /*
18 | Global font style
19 | */
20 |
21 | h1 {
22 | margin-top: 0;
23 | font-size: 1.6em;
24 | }
25 |
26 | h2 {
27 | margin-top: 0;
28 | font-size: 1.3em;
29 | }
30 |
31 | h3 {
32 | margin: 1.5em 0 0.5em;
33 | font-size: 1em;
34 | }
35 |
36 | a {
37 | color: inherit;
38 | }
39 |
40 | code, pre {
41 | color: #b22222;
42 | font-size: 1.2em;
43 | }
44 |
45 | .fail {
46 | color: #b22222;
47 | }
48 |
49 | .warning {
50 | color: #ffa500;
51 | }
52 |
53 | .description {
54 | margin-top: -.5em;
55 | }
56 |
57 | /*
58 | Global input element style
59 | */
60 |
61 | fieldset {
62 | border: 0;
63 | padding: 0;
64 | margin: .5em 0;
65 | min-width: 0;
66 | }
67 |
68 | input,
69 | button,
70 | label {
71 | border: 0;
72 | font-size: 1em;
73 | }
74 |
75 | input {
76 | transition: border .2s ease;
77 | }
78 |
79 | input:focus,
80 | button:focus {
81 | outline: 0;
82 | }
83 |
84 | input:-webkit-autofill {
85 | -webkit-box-shadow: 0 0 0px 1000px #fff inset;
86 | }
87 |
88 | ::-webkit-input-placeholder {
89 | color: #889;
90 | }
91 |
92 | :-moz-placeholder {
93 | color: #889;
94 | opacity: 1;
95 | }
96 |
97 | ::-moz-placeholder {
98 | color: #889;
99 | opacity: 1;
100 | }
101 |
102 | :-ms-input-placeholder {
103 | color: #889;
104 | }
105 |
106 | /*
107 | Specific element style
108 | */
109 |
110 | .left {
111 | overflow: hidden;
112 | display: block;
113 | }
114 |
115 | .left input {
116 | width: 100%;
117 | height: 2em;
118 | line-height: 2;
119 | padding: 0 .4em;
120 | border: 2px solid #ccd;
121 | border-top-left-radius: 4px;
122 | border-bottom-left-radius: 4px;
123 | border-right: 0;
124 | }
125 |
126 | .right {
127 | border-top-right-radius: 4px;
128 | border-bottom-right-radius: 4px;
129 | float: right;
130 | }
131 |
132 | /*
133 | Global page layout
134 | */
135 |
136 | .center {
137 | width: 100%;
138 | max-width: 840px;
139 | margin: 0 auto;
140 | padding: 0 2em;
141 | }
142 |
143 | .white {
144 | background: #fff;
145 | padding: 2.5em 0 3em;
146 | margin-bottom: 3em;
147 | box-shadow: 0 1px 4px rgba(0, 0, 0, .3)
148 | }
149 |
150 | /*
151 | Login
152 | */
153 |
154 | .login .center {
155 | max-width: 460px;
156 | }
157 |
158 | .login input:focus {
159 | border-color: #334;
160 | }
161 |
162 | .login .right {
163 | height: 2em;
164 | line-height: 2em;
165 | color: #fff;
166 | background: #334;
167 | text-align: center;
168 | width: 40px;
169 | }
170 |
171 | .login .submit {
172 | margin-top: 1em;
173 | width: 100%;
174 | height: 2em;
175 | text-align: center;
176 | color: #fff;
177 | background: #2e8b57;
178 | border-radius: 4px;
179 | cursor: pointer;
180 | transition: background .2s ease;
181 | }
182 |
183 | .login .submit:hover,
184 | .login .submit:focus {
185 | background: #21653f;
186 | }
187 |
188 | /*
189 | Navigation bar
190 | */
191 |
192 | .navigation {
193 | font-size: .8em;
194 | color: #ccd;
195 | background: #334;
196 | padding: .5em;
197 | }
198 |
199 | .navigation a {
200 | text-decoration: none;
201 | }
202 |
203 | .navigation a:hover {
204 | color: #fff;
205 | }
206 |
207 | .navigation .logout,
208 | .navigation .profile {
209 | float: right;
210 | }
211 |
212 | .navigation span {
213 | margin: 0 .5em;
214 | }
215 |
216 | /*
217 | Search
218 | */
219 |
220 | .search .main {
221 | margin: 1.5em 0 2em;
222 | }
223 |
224 | .search .left input:focus {
225 | border-color: #2e8b57;
226 | }
227 |
228 | .search .submit {
229 | height: 2em;
230 | width: 110px;
231 | float: right;
232 | color: #fff;
233 | background: #2e8b57;
234 | border-top-right-radius: 4px;
235 | border-bottom-right-radius: 4px;
236 | cursor: pointer;
237 | text-align: left;
238 | padding: 2px 6px 3px 14px;
239 | transition: background .2s ease;
240 | }
241 |
242 | .search .submit:hover,
243 | .search .submit:focus {
244 | background: #21653f;
245 | }
246 |
247 | .search .submit i {
248 | margin-left: 5px;
249 | }
250 |
251 | .search .options.sort {
252 | margin-top: -.25em;
253 | }
254 |
255 | .search .options > div {
256 | display: inline-block;
257 | white-space: nowrap;
258 | }
259 |
260 | .search .options strong {
261 | width: 3.5em;
262 | display: inline-block;
263 | }
264 |
265 | .search .options label {
266 | margin-right: .5em;
267 | }
268 |
269 | /*
270 | Results
271 | */
272 |
273 | .result .document {
274 | margin: 20px 0 40px;
275 | padding: 10px;
276 | position: relative;
277 | background: #fff;
278 | box-shadow: 0 1px 4px rgba(0, 0, 0, .3);
279 | }
280 |
281 | .result .document: before,
282 | .result .document: after {
283 | content: "";
284 | position: absolute;
285 | z-index: -1;
286 | box-shadow: 0 0 20px rgba(0, 0, 0, .8);
287 | top: 50%;
288 | bottom: 0;
289 | left: 10px;
290 | right: 10px;
291 | border-radius: 100px / 10px;
292 | }
293 |
294 | .result .header {
295 | margin-top: 5px;
296 | color: #889;
297 | }
298 |
299 | .result .download {
300 | float: right;
301 | cursor: pointer;
302 | }
303 |
304 | .result .download a {
305 | display: inline-block;
306 | font-size: .8em;
307 | padding: 4px 5px;
308 | background: #889;
309 | color: #fff;
310 | border-radius: 3px;
311 | margin-right: 3px;
312 | text-decoration: none;
313 | transition: background .2s ease;
314 | }
315 |
316 | .result .download a:hover {
317 | background: #667;
318 | }
319 |
320 | .result .hit {
321 | font-size: .9em;
322 | }
323 |
324 | .result .hit em {
325 | font-style: normal;
326 | font-weight: bold;
327 | background: #ffff00;
328 | }
329 |
330 | .result .count p:first-child {
331 | font-weight: bold;
332 | }
333 |
334 | /*
335 | Help
336 | */
337 |
338 | .help ul {
339 | list-style: none;
340 | padding: 0;
341 | }
342 |
--------------------------------------------------------------------------------
/public/font/icons.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Elasticsearch Frontend
2 |
3 | []() []() []()
4 |
5 | Simple search interface for large document collections in [Elasticsearch](https://www.elastic.co/de/products/elasticsearch). Made for the exploration and analysis of big document leaks. The application is build with [Express](https://expressjs.com/) and [Pug](https://pugjs.org/). User authentication and protected routes are provided by [Passport](http://passportjs.org/).
6 |
7 | ## History
8 | The initial prototype was build to uncover the tax haven in the free trade zone of Madeira. We used Elasticsearch to build a document search for the [Madeira Gazette](www.gov-madeira.pt/joram/). Many of those big PDF files are simple document scans which we wanted to search for persons and company names. Read the whole story: [Madeira – A Tax Haven Approved by the European Commission](http://web.br.de/madeira/english/)
9 |
10 | Why build another document search engine? – Because it super lightweight and customizable. Until we add more features.
11 |
12 | ## Requirements
13 | The application is written in JavaScript. You'll need **Node.js v6** at least, to run the application. Check out the [Node.js installation guide](https://nodejs.org/en/download/package-manager/). We use **Elasticsearch 2.4** for document storage and search. For further details, please refer to the [Elasticsearch installation guide](https://www.elastic.co/guide/en/elasticsearch/reference/2.4/_installation.html).
14 |
15 | To check if your Elasticsearch is up and running, call the REST-Interface from the command line:
16 |
17 | ```
18 | $ curl -XGET http://localhost:9200/_cluster/health\?pretty\=1
19 | ```
20 |
21 | If you are seeing a _Unassigned shards_ warning, you might consider setting the numbers of replicas to 0. This works fine in a development environment:
22 |
23 | ```
24 | $ curl -XPUT 'localhost:9200/_settings' -d '
25 | {
26 | index: {
27 | number_of_replicas : 0
28 | }
29 | }'
30 | ```
31 |
32 | To check if your document are all in place, run a simple search query on your index:
33 |
34 | ```
35 | $ curl -XGET 'localhost:9200/my-index/_search?q=body:my-query&pretty'
36 | ```
37 |
38 | ## Installation
39 | Installation and configuration is straight forward, once Elasticsearch is set up.
40 |
41 | 1. Import documents to Elasticsearch: If you have never done that before, there is another repo dedicated to extracting text from PDF files and importing them to Elasticsearch: [elasticsearch-import-tools](https://github.com/br-data/elasticsearch-import-tools)
42 | 2. Edit the `config/config.development.js` file.
43 | 3. Start the server: `npm start`.
44 | 4. Go to http://localhost:3000. The default username is `user` and the password is `password`.
45 |
46 | ## Searching
47 | There are four different ways to search for whole sentences (full-text) or a single word (term):
48 |
49 | **Standard search** (full-text search): Finds exact word combinations like `John Doe`. Diacritcs are ignored and a search for `John Doe` will also find `Jóhñ Döé`.
50 |
51 | **Custom search** (full-text search): By default, the custom search finds all documents that contain `John` AND `Doe`. Supports wildcards and simple search operators:
52 |
53 | - `+` signifies AND operation
54 | - `|` signifies OR operation
55 | - `-` negates a single token
56 | - `"` wraps a number of tokens to signify a phrase for searching
57 | - `*` at the end of a term signifies a prefix query
58 | - `~N` after a word signifies edit distance (fuzziness)
59 | - `~N` after a phrase signifies slop amount
60 |
61 | **Fuzzy search** (term-based search): Finds words, even if they contain a typo or OCR mistake. A search for `Jhon` or `J°hn` will also find `John`.
62 |
63 | **Regex search** (term-based search): Uses Regex patterns like `J.h*` for searching. This Regex will find words such as `John`, `Jahn` and `Johnson`.
64 |
65 | ## Customization
66 | If you want to change the page title and description, simply update the configuration `config/config.development.js`.
67 |
68 | ```
69 | config.page = {
70 | title: 'Document Search',
71 | description: 'Search Elasticsearch documents for persons, companies and addresses.'
72 | };
73 | ```
74 |
75 | ## Authentication
76 | The current authentication strategy is username and password, using [passport-local](https://github.com/jaredhanson/passport-local). Passport provides many different authentication strategies as Express middleware. If you want to change the authentication method, go ahead, check out the [Passport docs](http://passportjs.org/).
77 |
78 | For the ease of development, valid users are stored in the configuration `config/config.development.js`:
79 |
80 | ```javascript
81 | config.users = [
82 | {
83 | id: 1,
84 | username: 'user',
85 | password: '$2a$10$vP0qJyEd0hvvpG5MAaHg9ObUJJpJj9HxINZ/Yqz5nPo5Ms2nhR4r.',
86 | displayName: 'Demo User',
87 | apiToken: '0b414d8433124406be6500833f1672e5'
88 | }
89 | ];
90 | ```
91 |
92 | New password hashes are created using [bcrypt](https://github.com/kelektiv/node.bcrypt.js):
93 |
94 | ```javacript
95 | const bcrypt = require('bcrypt')
96 | const saltRounds = 10
97 | const myPlaintextPassword = 'password'
98 | const salt = bcrypt.genSaltSync(saltRounds)
99 | const passwordHash = bcrypt.hashSync(myPlaintextPassword, salt)
100 |
101 | ```
102 |
103 | Note that the list of user could easily be stored in a database like MongoDB.
104 |
105 | ## API
106 |
107 | ```
108 | curl -H "Authorization: Bearer 0b414d8433124406be6500833f1672e5" http://127.0.0.1:3000/api
109 | curl "http://127.0.0.1:3000/api?access_token=0b414d8433124406be6500833f1672e5"
110 |
111 | curl -H "Authorization: Bearer 0b414d8433124406be6500833f1672e5" "http://localhost:3000/api/search?query=ciboule&type=match&sorting=date"
112 | ```
113 |
114 | ## Deployment
115 | To deploy the application in a live environment, create a new configuration `config/config.production.js`. Update it with all your server information, Elasticsearch host, credentials etc.
116 |
117 | Use the new configuration by starting node with the environment variable set to `production`:
118 |
119 | ```
120 | $ NODE_ENV=production node bin/www
121 | ```
122 |
123 | To keep it running, use a process manager like [forever](https://github.com/foreverjs/forever) or [PM2](https://github.com/Unitech/pm2):
124 |
125 | ```
126 | $ NODE_ENV=production forever start bin/www
127 | ```
128 |
129 | It's advisable to use SSL/TLS encryption for all connections to the server. One way to do this, is routing your Node.js application through an Apache or Nginx proxy with HTTPS enabled.
130 |
131 | ## Debugging
132 | The app uses [debug](https://github.com/visionmedia/debug) as it's core debugging utility. To set the app into debug mode set the environment variable `debug`.
133 |
134 | ```
135 | export DEBUG=*
136 | ```
137 |
138 | If you are on a Winodws machine use:
139 |
140 | ```
141 | set DEBUG=*
142 | ```
143 |
144 | ## Planned features
145 | - Add (inline) document viewer
146 | - Add document import and ingestion
147 | - Add direct API access
148 | - Split data retrieval and rendering
149 |
150 | ## Similar projects:
151 | If you are looking for alternatives, check out:
152 | - OCCRP: [Aleph](https://github.com/alephdata/aleph), powering the [Investigative Dashboard](http://data.occrp.org)
153 | - ICIJ: [Datashare](https://datashare.icij.org/)
154 | - EIC: [Hoover](https://hoover.github.io/)
155 | - New York Times: [Stevedore](https://github.com/newsdev/stevedore)
156 | - [DocumentCloud](https://github.com/documentcloud)
157 | - [Open Semantic Search](https://www.opensemanticsearch.org)
158 | - [Overview](https://www.overviewdocs.com/)
159 |
--------------------------------------------------------------------------------