├── .gitignore ├── LICENSE ├── README.md ├── Vagrantfile └── app └── server ├── app.js ├── bin └── www ├── config ├── config.json_template └── token.json_template ├── lib ├── gmail-helper.js └── zotero-helper.js ├── package.json └── setup.js /.gitignore: -------------------------------------------------------------------------------- 1 | app/server/config/*.json 2 | .vagrant 3 | logs 4 | *.log 5 | pids 6 | *.pid 7 | *.seed 8 | lib-cov 9 | coverage 10 | .grunt 11 | .lock-wscript 12 | build/Release 13 | node_modules 14 | bower_components 15 | *.sublime-workspace 16 | 17 | 18 | # Compiled source # 19 | ################### 20 | *.com 21 | *.class 22 | *.dll 23 | *.exe 24 | *.o 25 | *.so 26 | 27 | # Packages # 28 | ############ 29 | # it's better to unpack these files and commit the raw source 30 | # git has its own built in compression methods 31 | *.7z 32 | *.dmg 33 | *.gz 34 | *.iso 35 | *.jar 36 | *.rar 37 | *.tar 38 | *.zip 39 | 40 | # Logs and databases # 41 | ###################### 42 | *.log 43 | *.sql 44 | *.sqlite 45 | 46 | # OS generated files # 47 | ###################### 48 | .DS_Store 49 | .DS_Store? 50 | ._* 51 | .Spotlight-V100 52 | .Trashes 53 | ehthumbs.db 54 | Thumbs.db -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Florian Glatz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zotero-gmail-scholar 2 | Add journal articles discovered through Google Scholar's "Keyword Alert Service" directly to Zotero. Google Scholar lets you [observe keywords](https://scholar.google.com/scholar_alerts?view_op=list_alerts) and be notified when journal articles are discovered that contain some of the words. 3 | Unfortunately, Scholar only let's you push those notifications to your email address. Adding them to Zotero is tedious - it can, should and therefore will be automated thanks to the magic of APIs! 4 | 5 | # Installation 6 | ## On the Server 7 | vagrant up && vagrant ssh # set up vagrant box and log in 8 | cd /app/server && npm install # install npm dependencies 9 | chmod +x bin/www # make executable 10 | ./bin/www # start the app 11 | ## Google API access 12 | 1. Go to [Google Developer Console](https://code.google.com/apis/console/?hl=de&pli=1) 13 | 2. Go to APIs and Authentication 14 | 3. Activate Gmail API 15 | 4. Create new OAuthClient() --> save client, secret and return address 16 | 17 | ## How does it work? 18 | This software changes nothing about the fundamental workings of notifications in Google Scholar; but: the software automatically extracts the relevant information (metadata) regarding the recommendations sent by Goole Scholar. Those are then written to the Zotero Sync Server! 19 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # All Vagrant configuration is done below. The "2" in Vagrant.configure 5 | # configures the configuration version (we support older styles for 6 | # backwards compatibility). Please don't change it unless you know what 7 | # you're doing. 8 | Vagrant.configure(2) do |config| 9 | # The most common configuration options are documented and commented below. 10 | # For a complete reference, please see the online documentation at 11 | # https://docs.vagrantup.com. 12 | 13 | # Every Vagrant development environment requires a box. You can search for 14 | # boxes at https://atlas.hashicorp.com/search. 15 | config.vm.box = "ubuntu/trusty64" 16 | 17 | # Disable automatic box update checking. If you disable this, then 18 | # boxes will only be checked for updates when the user runs 19 | # `vagrant box outdated`. This is not recommended. 20 | # config.vm.box_check_update = false 21 | 22 | # Create a forwarded port mapping which allows access to a specific port 23 | # within the machine from a port on the host machine. In the example below, 24 | # accessing "localhost:8080" will access port 80 on the guest machine. 25 | # config.vm.network "forwarded_port", guest: 80, host: 8080 26 | config.vm.network "forwarded_port", guest: 3000, host: 3000 27 | 28 | # Create a private network, which allows host-only access to the machine 29 | # using a specific IP. 30 | # config.vm.network "private_network", ip: "192.168.33.10" 31 | config.vm.network "private_network", type: "dhcp" 32 | # https://github.com/mitchellh/vagrant/issues/3083 33 | 34 | # Create a public network, which generally matched to bridged network. 35 | # Bridged networks make the machine appear as another physical device on 36 | # your network. 37 | # config.vm.network "public_network" 38 | 39 | # Share an additional folder to the guest VM. The first argument is 40 | # the path on the host to the actual folder. The second argument is 41 | # the path on the guest to mount the folder. And the optional third 42 | # argument is a set of non-required options. 43 | # config.vm.synced_folder "../data", "/vagrant_data" 44 | config.vm.synced_folder "app/server", "/app/server", 45 | :nfs => true, 46 | :mount_options => ['actimeo=2'] 47 | config.vm.synced_folder "app/client", "/app/client" 48 | config.vm.synced_folder "app/shared", "/app/shared" 49 | 50 | # Provider-specific configuration so you can fine-tune various 51 | # backing providers for Vagrant. These expose provider-specific options. 52 | # Example for VirtualBox: 53 | # 54 | # config.vm.provider "virtualbox" do |vb| 55 | # # Display the VirtualBox GUI when booting the machine 56 | # vb.gui = true 57 | # 58 | # # Customize the amount of memory on the VM: 59 | # vb.memory = "1024" 60 | # end 61 | # 62 | # View the documentation for the provider you are using for more 63 | # information on available options. 64 | 65 | # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies 66 | # such as FTP and Heroku are also available. See the documentation at 67 | # https://docs.vagrantup.com/v2/push/atlas.html for more information. 68 | # config.push.define "atlas" do |push| 69 | # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" 70 | # end 71 | 72 | # Enable provisioning with a shell script. Additional provisioners such as 73 | # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the 74 | # documentation for more information about their specific syntax and use. 75 | config.vm.provision "shell", inline: <<-SHELL 76 | locale-gen de_DE.UTF-8 77 | dpkg-reconfigure locales 78 | apt-get update 79 | apt-get install -y npm nodejs git 80 | update-alternatives --install /usr/bin/node node /usr/bin/nodejs 10 81 | npm install -g npm 82 | npm cache clean 83 | SHELL 84 | end 85 | -------------------------------------------------------------------------------- /app/server/app.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'); 2 | var jf = require('jsonfile'); 3 | 4 | var zotero = require('./lib/zotero-helper'); 5 | var gmail = require('./lib/gmail-helper'); 6 | 7 | var config = jf.readFileSync('config/config.json'); 8 | var collections = config.zotero.collections; // collection for incoming files 9 | 10 | gmail.getItems(_.keys(collections)) 11 | .on('items', function (items, keyword) { 12 | console.log(keyword, items); 13 | // zotero.saveItems(items, collections[keyword]); 14 | }); -------------------------------------------------------------------------------- /app/server/bin/www: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #!/usr/bin/env node 3 | 4 | DEBUG=zotero:* node app.js -------------------------------------------------------------------------------- /app/server/config/config.json_template: -------------------------------------------------------------------------------- 1 | { 2 | "credentials": { 3 | "zotero": { 4 | "user": "", 5 | "key": "" 6 | } 7 | }, 8 | "lastSync": null, 9 | "zotero": { 10 | "collections": { 11 | "exampleKeyword1": ["QXF4QX3Q"], 12 | "exampleKeyword2": ["S49HD2CE"] 13 | } 14 | }, 15 | "gmail": { 16 | "client": "", 17 | "secret": "", 18 | "redirect": "http://localhost:3000/" 19 | }, 20 | "languages": { 21 | "whitelist": ["english", "german"], 22 | "confidence": 0.5, 23 | "excludeOnLowConfidence": false 24 | } 25 | } -------------------------------------------------------------------------------- /app/server/config/token.json_template: -------------------------------------------------------------------------------- 1 | { 2 | "access_token": "", 3 | "token_type": "Bearer", 4 | "refresh_token": "", 5 | "expiry_date": 0 6 | } -------------------------------------------------------------------------------- /app/server/lib/gmail-helper.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'); 2 | var Promise = require('bluebird'); 3 | var sanitize = require('sanitize-filename'); 4 | var escape = require('escape-html'); 5 | var events = require('events'); 6 | var open = require('open'); 7 | var base64 = require('js-base64').Base64; 8 | var LanguageDetect = require('languagedetect'); 9 | var cheerio = require('cheerio'); 10 | var jf = require('jsonfile'); 11 | var config = jf.readFileSync('config/config.json'); 12 | var http = require('http'); 13 | var urlParser = require('url'); 14 | 15 | var google = require('googleapis'); 16 | var OAuth2 = google.auth.OAuth2; 17 | var readline = require('readline'); 18 | var Gmail = require('node-gmail-api'); 19 | 20 | function createClient() { 21 | return new OAuth2(config.gmail.client, config.gmail.secret, config.gmail.redirect); 22 | } 23 | 24 | function createToken() { 25 | return new Promise(function (resolve, reject) { 26 | var oauth2Client = createClient(); 27 | var scope = 'https://www.googleapis.com/auth/gmail.readonly'; 28 | 29 | // Generate URL 30 | var url = oauth2Client.generateAuthUrl({ 31 | access_type: 'offline', // 'online' (default) or 'offline' (gets refresh_token) 32 | scope: scope // If you only need one scope you can pass it as string 33 | }); 34 | 35 | // Start interactive mode in console 36 | var rl = readline.createInterface({ 37 | input: process.stdin, 38 | output: process.stdout 39 | }); 40 | 41 | console.log('Visit the url: ', url); 42 | open(url); // open browser window if possible (not working with vagrant :/) 43 | 44 | // start http server for oauth redirect 45 | var server = http.createServer(function (req, res) { 46 | var query = urlParser.parse(req.url, true).query; 47 | 48 | res.writeHead(200, {'Content-Type': 'text/html'}); 49 | res.end('Copy & Paste into Terminal Session: '); 50 | }).listen(3000); 51 | 52 | // command line interaction 53 | rl.question('Enter the code here:', function(code) { 54 | oauth2Client.getToken(code, function(err, token) { 55 | oauth2Client.setCredentials(token); 56 | jf.writeFileSync('config/token.json', token); 57 | resolve(token); 58 | rl.close(); // stop interactive mode 59 | server.close(); // stop http server 60 | }); 61 | }); 62 | }); 63 | } 64 | 65 | function getToken() { 66 | var token = jf.readFileSync('config/token.json'); 67 | 68 | if (token && token.expiry_date > new Date().getTime()) // expiration *should* be handled by google #missing 69 | return new Promise(function (resolve) { resolve(token) }); 70 | else 71 | return createToken(); 72 | } 73 | 74 | // e.g. filename($(e).text(), 'pdf'); 75 | function filename(string, extension) { 76 | return sanitize(string).replace(/ /g,"_").substring(0,65).toLowerCase() + '.' + extension; 77 | } 78 | 79 | function updateLastSync () { 80 | var now = new Date(); 81 | 82 | jf.writeFileSync('config/config.json', _.extend(config, { 83 | lastSync: [now.getFullYear(), now.getMonth(), now.getDay()].join('-') 84 | })); 85 | } 86 | 87 | function getItems(keywords) { 88 | var eventEmitter = new events.EventEmitter; 89 | 90 | getToken() 91 | .then(function (token) { 92 | var gmail = new Gmail(token.access_token); 93 | 94 | _.each(keywords, function (keyword) { 95 | var query = [ 96 | 'label:all', 97 | 'from:scholaralerts-noreply@google.com', 98 | 'subject:'+keyword, 99 | ]; 100 | 101 | if(config.lastSync) query.push('after:' + config.lastSync); 102 | 103 | var msg = gmail.messages(query.join(' AND '), {max: 100}); 104 | 105 | msg 106 | .on('data', function (d) { 107 | var body = base64.decode(d.payload.body.data.replace(/-/g, '+').replace(/_/g, '/')), // google-specific (cf. SO) 108 | $ = cheerio.load(body), 109 | items = _($('h3>a')) 110 | // put in right format 111 | .map(function (e) { 112 | return { 113 | title: $(e).text(), 114 | url: e.attribs.href 115 | }; 116 | }) 117 | // remove unwanted foreign languages 118 | .reduce(function (result, item) { 119 | var lngDetector = new LanguageDetect(), 120 | lng = lngDetector.detect(item.title, 1), 121 | c = config.languages; 122 | 123 | if (_.indexOf(lng[0], c.whitelist) || (!c.excludeOnLowConfidence && confidence < c.confidence)) { 124 | result.push(item); 125 | return result; 126 | } 127 | else return result; 128 | }, []); 129 | 130 | // emit batches 131 | eventEmitter.emit('items', items, keyword); 132 | 133 | // emit single items 134 | _.each(items, function (item) { 135 | eventEmitter.emit('item', item, keyword); 136 | }); 137 | 138 | updateLastSync(); 139 | }); 140 | }); 141 | }); 142 | 143 | return eventEmitter; 144 | } 145 | 146 | // module API 147 | exports = module.exports = { 148 | getItems: getItems 149 | } -------------------------------------------------------------------------------- /app/server/lib/zotero-helper.js: -------------------------------------------------------------------------------- 1 | // libs 2 | var _ = require('lodash'); 3 | var jf = require('jsonfile'); 4 | var crypto = require('crypto'); 5 | 6 | var zotero = require('zotero'); // https://github.com/inukshuk/zotero-node 7 | var Promise = require('bluebird'); 8 | var FormData = require('form-data'); 9 | 10 | var cheerio = require('cheerio'); 11 | var LanguageDetect = require('languagedetect'); 12 | 13 | // config 14 | var config = jf.readFileSync('config/config.json'); 15 | var credentials = config.credentials; 16 | 17 | // zotero objects 18 | var client = new zotero.Client; 19 | var lib = new zotero.Library({ 20 | user: credentials.zotero.user, 21 | key: credentials.zotero.key 22 | }); 23 | // var stream = new zotero.Stream({ apiKey: key }); 24 | 25 | // templates 26 | var templates = { 27 | attachment: { 28 | itemType: 'attachment', 29 | 30 | // parentItem: 'ABCD2345'"', 31 | linkMode: 'imported_url', 32 | // contentType: 'application/pdf', 33 | tags: [] 34 | }, 35 | journalArticle: { 36 | itemType: 'journalArticle', 37 | 38 | abstractNote: '', 39 | publicationTitle: '', 40 | volume: '', 41 | issue: '', 42 | pages: '', 43 | date: '', 44 | series: '', 45 | seriesTitle: '', 46 | seriesText: '', 47 | journalAbbreviation: '', 48 | language: '', 49 | DOI: '', 50 | ISSN: '', 51 | shortTitle: '', 52 | url: '', 53 | accessDate: '', 54 | archive: '', 55 | archiveLocation: '', 56 | libraryCatalog: '', 57 | callNumber: '', 58 | rights: '', 59 | extra: '' 60 | } 61 | }; 62 | 63 | // config 64 | client.persist = true; // make the client re-use the TCP connection to the server 65 | zotero.promisify(Promise.promisify.bind(Promise)); // make zotero use promises instead of callbacks 66 | 67 | /* 68 | */ 69 | function toItems(items, collections) { 70 | return _.map(items, function (item) { 71 | return _.extend({}, templates.journalArticle, item, { collections: collections }); 72 | }) 73 | } 74 | 75 | /* 76 | * Create Virtual Files (itemType=attachment) on Zotero Server 77 | */ 78 | function createFiles (items) { 79 | return new Promise(function (resolve, reject) { 80 | // error handling 81 | if(!_.isArray(items) && _.isObject(items)) 82 | items = [items]; 83 | else if (!_.isArray(items)) { 84 | reject('createFiles() expects array or object as argument'); 85 | return false; 86 | } 87 | 88 | client.post('/users/' + credentials.zotero.user + '/items', {key: credentials.zotero.key}, items) 89 | .then(function (resp) { 90 | var data = resp.data, 91 | itemKey; 92 | 93 | if(_.isEmpty(data.success)) 94 | reject(data); 95 | else 96 | resolve(data.success); 97 | }) 98 | .catch(function (err) { 99 | reject(err); 100 | }); 101 | }); 102 | } 103 | 104 | function saveItems(items, collections) { 105 | var fn = _.throttle(createFiles, config.zotero.throttle); 106 | 107 | return fn(toItems(items, collections)); 108 | } 109 | 110 | /* 111 | * Play around 112 | // */ 113 | // var file = { 114 | // // filename: 'doc.pdf', 115 | // url: 'http://example.com/doc.pdf', 116 | // title: 'My Document' 117 | // } 118 | 119 | // createFiles(_.extend(templates.journalArticle, file)) 120 | // // client.get('/itemTypeFields', {itemType: 'journalArticle'}) 121 | // .then(function (itemKey) { 122 | // console.log('success', itemKey); 123 | // }) 124 | // .catch(function (err) { 125 | // console.log('error', err); 126 | // }) 127 | 128 | // var form = new FormData() 129 | // form.append('my_field', 'my value'); 130 | // form.append('my_buffer', new Buffer(10)); 131 | // form.append('my_file', file); 132 | // form.submit('http://api.zotero.org/user/', function(err, res) { 133 | // // res – response object (http.IncomingMessage) // 134 | // res.resume(); // for node-0.10.x 135 | // }); 136 | 137 | // helpers 138 | function checksum (stream) { 139 | return new Promise(function (resolve, reject) { 140 | var hash = crypto.createHash('md5'); 141 | 142 | // error 143 | stream.on('error', function () { 144 | reject(hash.digest('hex')); 145 | }); 146 | 147 | // update hash 148 | stream.on('data', function (data) { 149 | hash.update(data, 'utf8') 150 | }); 151 | 152 | // return hash 153 | stream.on('end', function () { 154 | resolve(hash.digest('hex')); 155 | }); 156 | }); 157 | } 158 | 159 | 160 | // module API 161 | exports = module.exports = { 162 | saveItems: saveItems 163 | }; -------------------------------------------------------------------------------- /app/server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zotero_automator", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "app.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "bluebird": "^2.9.25", 13 | "bunyan": "^1.3.5", 14 | "cheerio": "^0.19.0", 15 | "crypto": "0.0.3", 16 | "escape-html": "^1.0.1", 17 | "form-data": "^0.2.0", 18 | "googleapis": "^2.0.4", 19 | "js-base64": "^2.1.8", 20 | "jsonfile": "^2.0.0", 21 | "languagedetect": "^1.1.1", 22 | "lodash": "^3.9.1", 23 | "node-gmail-api": "^0.3.1", 24 | "open": "0.0.5", 25 | "passport": "^0.2.2", 26 | "passport-google-oauth": "^0.2.0", 27 | "sanitize-filename": "^1.3.0", 28 | "zotero": "^0.2.1" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /app/server/setup.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flotob/zotero-gmail-scholar/1edd777c1f51d6712c540b40918d16fbf8252ab6/app/server/setup.js --------------------------------------------------------------------------------