├── .gitignore ├── README.adoc ├── protractor ├── README.adoc ├── amazon-affiliate.js ├── mufg.js ├── package.json ├── protractor.conf.js ├── rakuten.js └── smbc.js └── webdriverio ├── README.adoc ├── mufg.js ├── package.json └── runner.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = example-scrape-websites 2 | 3 | Examples scripts using http://angular.github.io/protractor/[Protractor] and http://webdriver.io/[WebdriverIO] to scrape websites. 4 | 5 | For details, look into the corresponding directories. 6 | 7 | * link:/protractor/[Protractor] 8 | * link:/webdriverio/[WebdriverIO] 9 | -------------------------------------------------------------------------------- /protractor/README.adoc: -------------------------------------------------------------------------------- 1 | = Scraping with Protractor 2 | 3 | Scrape websites using http://angular.github.io/protractor/[Protoractor]. 4 | 5 | npm install 6 | npm run scrape-mufg 7 | npm run scrape-rakuten 8 | 9 | == MUFG 10 | 11 | Scrapes http://direct.bk.mufg.jp/[三菱東京UFJ銀行] and prints current balance and stores last month's details to mufg-__YYYY__-__mm__.tsv. 12 | 13 | Required environment variables: 14 | 15 | * `MUFG_ID` 16 | * `MUFG_PASSWORD` 17 | 18 | == Rakuten 19 | 20 | Scrapes https://www.rakuten-bank.co.jp/[楽天銀行] and prints current balance and stores last month's details to rakuten-__YYYY__-__mm__.tsv. 21 | 22 | Required environment variables: 23 | 24 | * `RAKUTEN_ID` 25 | * `RAKUTEN_PASSWORD` 26 | * `RAKUTEN_IMAP_ID` 27 | ** Gmail IMAP account 28 | * `RAKUTEN_IMAP_PASSWORD` 29 | ** Gmail IMAP password 30 | 31 | * `RAKUTEN_QUESTIONS_{no}` 32 | ** Questions and answers which are displayed when logging in rakuten bank 33 | ** Separated by tab (\\t) character 34 | 35 | === Example 36 | 37 | ---- 38 | export RAKUTEN_QUESTIONS_1='出身地は?(tab)...' 39 | export RAKUTEN_QUESTIONS_2='初めて飼ったペットの名前は?(tab)...' 40 | export RAKUTEN_QUESTIONS_3='所有している車は?(tab)...' 41 | ---- 42 | -------------------------------------------------------------------------------- /protractor/amazon-affiliate.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | 3 | describe('amazon-affiliate', function () { 4 | browser.ignoreSynchronization = true; 5 | browser.get('https://affiliate.amazon.co.jp/gp/associates/network/main.html'); 6 | 7 | it('ログイン', function () { 8 | $('#ap_signin_existing_radio').click(); 9 | $('input[name="email"]') .sendKeys(browser.params.amazon.email); 10 | $('input[name="password"]').sendKeys(browser.params.amazon.password); 11 | 12 | $('#signInSubmit').click(); 13 | }); 14 | 15 | it('今月の速報値を表示', function () { 16 | $$('#mini-report .line-item, #mini-report .line-item-total').each(function (lineElem) { 17 | lineElem.getText().then(function (text) { 18 | console.log(text.replace(/\n/, '\t')); 19 | }); 20 | }); 21 | }); 22 | 23 | it('レポートへ', function () { 24 | element(by.linkText('レポート全体を表示')).click(); 25 | }); 26 | 27 | it('レポートをダウンロード', function () { 28 | $('option[value="ordersReport"]').click(); 29 | $('input[name="submit.download_CSV"]').click(); // 画面上に表示されているテキストでは TSV … 30 | browser.pause(); 31 | }); 32 | }); 33 | -------------------------------------------------------------------------------- /protractor/mufg.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | 3 | var lastMonth = new Date(); 4 | lastMonth.setMonth(lastMonth.getMonth() - 1); 5 | 6 | var outFilePath = [ 7 | 'mufg-', lastMonth.getFullYear(), '-', (lastMonth.getMonth() + 101).toString().substr(-2), '.tsv' 8 | ].join(''); 9 | 10 | function $x (xpath) { 11 | return element(by.xpath(xpath)); 12 | } 13 | 14 | describe('mufg', function () { 15 | browser.ignoreSynchronization = true; 16 | browser.get('https://entry11.bk.mufg.jp/ibg/dfw/APLIN/loginib/login?_TRANID=AA000_001'); 17 | 18 | it('ログイン', function () { 19 | $('input[name="KEIYAKU_NO"]').sendKeys(browser.params.mufg.id); 20 | $('input[name="PASSWORD"]') .sendKeys(browser.params.mufg.password); 21 | $('[alt="ログイン"]').click(); 22 | }); 23 | 24 | it('お知らせがあったら読む', function () { 25 | function readInformationIfAny() { 26 | browser.getCurrentUrl().then(function (url) { 27 | if (/InformationIchiranShoukaiMidoku\.do/.exec(url)) { 28 | var information = $x('//table[@class="data"]/tbody[1]/tr'); 29 | 30 | information.getText().then(console.log); 31 | information.element(by.buttonText('表示')).click(); 32 | 33 | $('[alt="トップページへ"]').click().then(readInformationIfAny); 34 | } 35 | }); 36 | } 37 | 38 | readInformationIfAny(); 39 | }); 40 | 41 | it('ログイン完了 - 明細へ', function () { 42 | $('#setAmountDisplay').getText().then(function (amount) { 43 | console.log('残高: ' + amount); 44 | }); 45 | $x('//a[img[@alt="入出金明細をみる"]]').click(); 46 | }); 47 | 48 | it('先月の明細を見る', function () { 49 | $('input#last_month').click(); 50 | $x('//button[img[@alt="照会"]]').click(); 51 | }); 52 | 53 | it('ファイルに書き出す', function () { 54 | $$('#no_memo table tr').map(function (tr) { 55 | return tr.all(by.css('td')).map(function (td) { return td.getText() }); 56 | }).then(function (rows) { 57 | var content = rows.filter(function (cols) { 58 | return cols.length === 5; 59 | }).map(function (cols) { 60 | return cols.map(function (col) { return col.replace(/\s+/g, ' ') }).join('\t'); 61 | }).join('\n'); 62 | 63 | fs.writeFileSync(outFilePath, content); 64 | console.log('wrote: ' + outFilePath); 65 | }); 66 | }); 67 | }); 68 | -------------------------------------------------------------------------------- /protractor/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "protractor-scrape-money", 3 | "version": "0.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "postinstall": "webdriver-manager update", 8 | "scrape-mufg": "protractor --specs mufg.js", 9 | "scrape-rakuten": "protractor --specs rakuten.js", 10 | "scrape-smbc": "tsc smbc.ts && protractor --specs smbc.js", 11 | "scrape-amazon-affiliate": "protractor --specs amazon-affiliate.js" 12 | }, 13 | "author": "motemen ", 14 | "license": "MIT", 15 | "private": true, 16 | "dependencies": { 17 | "iconv": "^2.2.0", 18 | "inbox": "^1.1.59", 19 | "protractor": "^1.3.0" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /protractor/protractor.conf.js: -------------------------------------------------------------------------------- 1 | exports.config = { 2 | capabilities: { 3 | browserName: 'chrome', 4 | chromeOptions: { 5 | 'excludeSwitches': ['ignore-certificate-errors'], 6 | prefs: { 7 | download: { 8 | prompt_for_download: false, 9 | default_directory: process.cwd() + '/downloads' 10 | }, 11 | } 12 | } 13 | }, 14 | 15 | specs: ['*.js'], 16 | 17 | jasmineNodeOpts: { 18 | isVerbose: true, 19 | showColors: true, 20 | defaultTimeoutInterval: 30000, 21 | includeStackTrace: false 22 | }, 23 | 24 | onPrepare: function () { 25 | jasmine.getEnv().afterEach(function () { 26 | var spec = jasmine.getEnv().currentSpec; 27 | if (spec.results().failedCount > 0) { 28 | browser.pause(); 29 | } 30 | }); 31 | }, 32 | 33 | params: { 34 | mufg: { 35 | id: process.env.MUFG_ID, 36 | password: process.env.MUFG_PASSWORD 37 | }, 38 | rakuten: { 39 | id: process.env.RAKUTEN_ID, 40 | password: process.env.RAKUTEN_PASSWORD, 41 | questions: (function () { 42 | var questions = []; 43 | for (var i = 1; process.env['RAKUTEN_QUESTIONS_' + i]; ++i) { 44 | var qa = process.env['RAKUTEN_QUESTIONS_' + i].split(/\t+/); 45 | if (qa.length !== 2) { 46 | break; 47 | } 48 | questions.push([new RegExp(qa[0]), qa[1]]); 49 | } 50 | return questions; 51 | })(), 52 | imap: { 53 | server: 'imap.gmail.com', 54 | id: process.env.RAKUTEN_IMAP_ID, 55 | password: process.env.RAKUTEN_IMAP_PASSWORD 56 | } 57 | }, 58 | smbc: { 59 | account: process.env.SMBC_ACCOUNT, 60 | password: process.env.SMBC_PASSWORD 61 | }, 62 | amazon: { 63 | email: process.env.AMAZON_EMAIL, 64 | password: process.env.AMAZON_PASSWORD 65 | } 66 | } 67 | }; 68 | -------------------------------------------------------------------------------- /protractor/rakuten.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var inbox = require('inbox'); 3 | var iconv = require('iconv'); 4 | var converter = new iconv.Iconv("ISO-2022-JP", "UTF-8"); 5 | 6 | var lastMonth = new Date(); 7 | lastMonth.setMonth(lastMonth.getMonth() - 1); 8 | 9 | var outFilePath = [ 10 | 'rakuten-', lastMonth.getFullYear(), '-', (lastMonth.getMonth() + 101).toString().substr(-2), '.tsv' 11 | ].join(''); 12 | 13 | // ワンタイムキー取得用プロミス 14 | var oneTimeKeyPromise; 15 | var oneTimeKey; 16 | 17 | describe('rakuten', function () { 18 | browser.ignoreSynchronization = true; 19 | browser.driver.get('https://fes.rakuten-bank.co.jp/MS/main/RbS?CurrentPageID=START&&COMMAND=LOGIN'); 20 | 21 | it('IMAPサーバにログインしてワンタイムキー通知メールを取得できるようイベント監視', function () { 22 | // http://ayapi.github.io/posts/observingimaponnode/ 23 | // http://liginc.co.jp/web/service/facebook/153850 24 | 25 | var deferred = protractor.promise.defer(); 26 | oneTimeKeyPromise = deferred.promise; 27 | 28 | var imap = inbox.createConnection( 29 | false, browser.params.rakuten.imap.server, { 30 | secureConnection: true, 31 | auth: { 32 | user: browser.params.rakuten.imap.id, 33 | pass: browser.params.rakuten.imap.password 34 | } 35 | } 36 | ); 37 | 38 | imap.on('connect', function() { 39 | console.log('connected'); 40 | imap.openMailbox('INBOX', function(error){ 41 | if(error) throw error; 42 | }); 43 | }); 44 | 45 | imap.on('new', function(message) { 46 | if (message.from.address !== 'service@ac.rakuten-bank.co.jp') { 47 | console.log('this message is not from rakuten bank. skip.'); 48 | console.log(message.title); 49 | console.log(message.from.address); 50 | return; 51 | } 52 | var body = ''; 53 | var stream = imap.createMessageStream(message.UID); 54 | stream.on("data", function(chunk) { 55 | body += chunk; 56 | }); 57 | stream.on("end", function() { 58 | body = converter.convert(body).toString(); 59 | // FIXME: body にはヘッダ部も含まれているため RFC822 に則ってちゃんとパースする? 60 | if (/ワンタイムキー[  ]*[::][  ]*([a-zA-Z0-9]+)/.test(body)) { 61 | var otKey = RegExp.$1; 62 | console.log('ワンタイムキーを本文から取得成功:' + otKey); 63 | deferred.fulfill(otKey); 64 | } else { 65 | console.log('ワンタイムキーを本文から取得失敗'); 66 | deferred.reject(); 67 | } 68 | }); 69 | }); 70 | 71 | imap.connect(); 72 | }); 73 | 74 | it('楽天銀行ログイン', function () { 75 | $('.user_id').sendKeys(browser.params.rakuten.id); 76 | $('.login_password').sendKeys(browser.params.rakuten.password); 77 | $('[value="ログイン"]').click(); 78 | }); 79 | 80 | it('ワンタイムキーを発行する', function () { 81 | $('[src="/rb/fes/img/common/btn_onetime.gif"]').element(by.xpath('..')).click(); 82 | }); 83 | 84 | it('IMAPサーバからワンタイムキーを取得', function () { 85 | browser.driver.wait(function () { 86 | return oneTimeKeyPromise; 87 | }, 60 * 1000, 'ワンタイムキーパスワード記載のメールを1分待つ') 88 | .then(function (otKey) { 89 | console.log('then(): otKey = ' + otKey); 90 | oneTimeKey = otKey; 91 | expect(typeof oneTimeKey).toBe('string'); 92 | expect(oneTimeKey).not.toBe(''); 93 | }, function (err) { 94 | console.log('Promise failure: '); 95 | console.log(err); 96 | }); 97 | }, 60 * 1000); 98 | 99 | it('ワンタイムキーを入力してログインする', function () { 100 | $('.security_code').sendKeys(oneTimeKey); 101 | $('[value="一時解除実行"]').click(); 102 | }); 103 | 104 | it('本人確認', function () { 105 | element(by.cssContainingText('div', 'ご本人確認のため、以下の認証情報を入力してください。')) 106 | .isPresent().then(function (b) { 107 | console.log('本人確認テキストが存在したか? = ' + b); 108 | if (!b) return; 109 | // 「質問」 110 | $('#INPUT_FORM > table.margintop20 > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td > div') 111 | .getText().then(function (questionText) { 112 | expect(typeof questionText).toBe('string'); 113 | expect(questionText).not.toBe(''); 114 | console.log('質問:' + questionText); 115 | // 「合言葉」 116 | var $answer = $('#INPUT_FORM > table.margintop20 > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(2) > td > div > input'); 117 | var myQuestions = browser.params.rakuten.questions; 118 | var i; 119 | for (i = 0; i < myQuestions.length; i++) { 120 | if (myQuestions[i][0].test(questionText)) { 121 | console.log('合言葉を入力:' + myQuestions[i][1]); 122 | $answer.sendKeys(myQuestions[i][1]); 123 | break; 124 | } 125 | } 126 | console.log('合言葉の入力を終了'); 127 | if (i >= myQuestions.length) { 128 | fail('最後まで見つからなかった'); 129 | } 130 | $('[value="次 へ"]').click(); 131 | console.log('次へ - クリック'); 132 | }); 133 | }); 134 | }); 135 | 136 | it('お知らせ', function () { 137 | var $next = $('[value=" 次へ (MyAccount) "]'); 138 | $next.isPresent().then(function (b) { 139 | console.log('次へボタンが存在したか? = ' + b); 140 | if (!b) return; 141 | $next.click(); 142 | }); 143 | }); 144 | 145 | it('先月の明細を取得', function () { 146 | element(by.linkText('入出金明細')).click(); 147 | }); 148 | 149 | it('ファイルに書き出す', function () { 150 | // 「最新の入出金明細(最大50件・24ヶ月以内)」 151 | $$('body > center:nth-child(4) > table > tbody > tr > td > table > tbody > tr > td > div.innerbox00 > table tr').map(function (tr) { 152 | return tr.all(by.css('td')).map(function (td) { return td.getText() }); 153 | }).then(function (rows) { 154 | var content = rows.filter(function (cols) { 155 | return cols.length === 4; 156 | }).map(function (cols) { 157 | return cols.map(function (col) { return col.replace(/\s+/g, ' ') }).join('\t'); 158 | }).join('\n'); 159 | 160 | fs.writeFileSync(outFilePath, content); 161 | console.log('wrote: ' + outFilePath); 162 | }); 163 | }); 164 | }); 165 | -------------------------------------------------------------------------------- /protractor/smbc.js: -------------------------------------------------------------------------------- 1 | function $x(xpath) { 2 | return element(by.xpath(xpath)); 3 | } 4 | describe('smbc', function () { 5 | browser.ignoreSynchronization = true; 6 | browser.get('https://direct.smbc.co.jp/aib/aibgsjsw5001.jsp'); 7 | var params = browser.params.smbc; 8 | it('ログイン', function () { 9 | $('input[name="S_BRANCH_CD"]').sendKeys(params.account.split(/-/)[0]); 10 | $('input[name="S_ACCNT_NO"]').sendKeys(params.account.split(/-/)[1]); 11 | $('input[name="PASSWORD"]').sendKeys(params.password); 12 | $('input[value="ログイン"]').click(); 13 | }); 14 | it('お知らせがあったら読む', function () { 15 | browser.getCurrentUrl().then(function (url) { 16 | if (url === 'https://direct3.smbc.co.jp/servlet/com.smbc.SUPRedirectServlet') { 17 | $('input[value="確認して次へ"]').click(); 18 | } 19 | }); 20 | }); 21 | it('ログイン完了', function () { 22 | $('.balance .fRight').getText().then(function (balance) { 23 | console.log('残高: ' + balance); 24 | $x('//a[.="明細照会"]').click(); 25 | }); 26 | }); 27 | it('', function () { 28 | browser.pause(); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /webdriverio/README.adoc: -------------------------------------------------------------------------------- 1 | = Scraping with WebdriverIO 2 | 3 | Scrape websites using http://webdriver.io/[WebdriverIO]. 4 | 5 | npm install 6 | npm run scrape-mufg 7 | 8 | == MUFG 9 | 10 | Scrapes http://direct.bk.mufg.jp/[三菱東京UFJ銀行] and prints current balance and stores last month's details to __YYYY__-__mm__.tsv. 11 | 12 | Required environment variables: 13 | 14 | * `MUFG_ID` 15 | * `MUFG_PASSWORD` 16 | 17 | == Known issues 18 | 19 | * MUFG: cannot proceed when the site forces user to read notifications 20 | -------------------------------------------------------------------------------- /webdriverio/mufg.js: -------------------------------------------------------------------------------- 1 | var webdriverio = require('webdriverio'), 2 | async = require('async'), 3 | fs = require('fs'); 4 | 5 | var lastMonth = new Date(); 6 | lastMonth.setMonth(lastMonth.getMonth() - 1); 7 | 8 | var outFilePath = [ 9 | 'mufg-', lastMonth.getFullYear(), '-', (lastMonth.getMonth() + 101).toString().substr(-2), '.tsv' 10 | ].join(''); 11 | 12 | var options = { 13 | host: 'localhost', 14 | port: 4444, 15 | desiredCapabilities: { 16 | // browserName: 'chrome', 17 | chromeOptions: { 18 | 'excludeSwitches': ['ignore-certificate-errors'] 19 | } 20 | } 21 | }; 22 | 23 | var WAIT = 5 * 1000; 24 | 25 | var client = webdriverio.remote(options).init(); 26 | 27 | client 28 | .url('https://entry11.bk.mufg.jp/ibg/dfw/APLIN/loginib/login?_TRANID=AA000_001') 29 | .addValue('[name="KEIYAKU_NO"]', process.env.MUFG_ID) 30 | .addValue('[name="PASSWORD"]', process.env.MUFG_PASSWORD) 31 | .click('[alt="ログイン"]') 32 | 33 | .on('readNotifications', function () { 34 | client.url(function (err, res) { 35 | if (/InformationIchiranShoukaiMidoku\.do/.exec(res.value)) { 36 | client 37 | .waitFor('//table[@class="data"]/tbody[1]/tr', WAIT) 38 | .element('//table[@class="data"]/tbody[1]/tr', function (err, tr) { 39 | client.elementIdText(tr.ELEMENT, function (err, text) { 40 | console.log(text); 41 | }); 42 | // click 43 | client.click('[alt="トップページへ"]') 44 | .emit('readNotifications') 45 | }) 46 | } 47 | }) 48 | }) 49 | .emit('readNotifications') 50 | 51 | .waitFor('#setAmountDisplay', WAIT) 52 | .getText('#setAmountDisplay', function (err, res) { 53 | console.log('残高: ' + res); 54 | }) 55 | .click('[alt="入出金明細をみる"]') 56 | 57 | .waitFor('input#last_month', WAIT) 58 | .click('input#last_month') 59 | .click('[alt="照会"]') 60 | 61 | .waitFor('#no_memo table tr', WAIT) 62 | .elements('#no_memo table tr', function (err, rows) { 63 | async.map(rows.value, function (row, cb) { 64 | client.elementIdElements(row.ELEMENT, 'td', function (err, cols) { 65 | async.map(cols.value, function (td, cb) { 66 | client.elementIdText(td.ELEMENT, cb); 67 | }, cb); 68 | }); 69 | }, function (err, rows) { 70 | var content = rows.filter(function (cols) { 71 | return cols.length === 5; 72 | }).map(function (cols) { 73 | return cols.map(function (td) { return td.value.replace(/\s+/g, ' ') }).join('\t'); 74 | }).join('\n'); 75 | 76 | fs.writeFileSync(outFilePath, content); 77 | console.log('wrote: ' + outFilePath); 78 | }); 79 | }) 80 | 81 | .end(); 82 | -------------------------------------------------------------------------------- /webdriverio/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webdriverio-scrape-money", 3 | "version": "0.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "scrape-mufg": "node runner.js mufg.js" 8 | }, 9 | "author": "motemen ", 10 | "license": "MIT", 11 | "private": true, 12 | "dependencies": { 13 | "webdriverio": "^2.2.3", 14 | "async": "^0.9.0", 15 | "selenium-standalone": "^2.43.1-2.9.0-1" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /webdriverio/runner.js: -------------------------------------------------------------------------------- 1 | var selenium = require('selenium-standalone'); 2 | var spawn = require('child_process').spawn; 3 | 4 | var server = selenium({ stdio: 'inherit' }, []); 5 | 6 | setTimeout(function () { 7 | var command = spawn('node', process.argv.slice(2), { stdio: 'inherit' }); 8 | command.on('close', function () { 9 | server.kill(); 10 | }); 11 | }, 1000); 12 | --------------------------------------------------------------------------------