├── LICENSE ├── README.md ├── background.js ├── common.js ├── dexie.js ├── inject.js ├── jquery-3.1.0.js ├── logo.pdn ├── logo_128.png ├── logo_16.png ├── logo_48.png ├── manifest.json ├── popup.html └── popup.js /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Astra West 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![TweetVacuum logo](https://github.com/T3hUb3rK1tten/TweetVacuum/raw/master/logo_128.png) 2 | # TweetVacuum 3 | Chrome extension to scrape a user's entire timeline, bypassing the Twitter API 3200 tweet limit. 4 | 5 | ## Usage 6 | 7 | Usage is simple. Click the icon in the top right of Chrome. Type in the username of the account you're using and click start. 8 | 9 | A new window will open up and start searching for tweets by that user. As the script runs, it will save the tweets and remove them from the search page. The search page starts to slow down significantly after ~1000 tweets, so the script will restart the tab and keep going where it left off. 10 | 11 | When it reaches the end of the timeline, make sure to stop the script. Currently it can't detect the difference between no more results and a crashed tab. 12 | 13 | The script will resume from where it left off if stopped. 14 | 15 | ## Installation 16 | 17 | ### Chrome Web Store 18 | The extension is published on the Chrome Web Store but unlisted. You can download it [here](https://chrome.google.com/webstore/detail/tweetvacuum/ieanpikkfcbeakclfkoeccpdcmfapjfl). 19 | 20 | ### Manually 21 | 1. Clone this repo or download it as a ZIP and extract it 22 | 2. Open the Chrome Extensions page 23 | 3. Check "Developer mode" 24 | 4. Click "Load unpacked extension..." and select the folder for the repo 25 | -------------------------------------------------------------------------------- /background.js: -------------------------------------------------------------------------------- 1 | chrome.browserAction.onClicked.addListener(function(activeTab) 2 | { 3 | var newURL = chrome.extension.getURL("popup.html"); 4 | chrome.tabs.create({ url: newURL }); 5 | }); -------------------------------------------------------------------------------- /common.js: -------------------------------------------------------------------------------- 1 | // This file is shared by popup and inject for code reuse 2 | 3 | function createDb(username) { 4 | db = new Dexie(username); 5 | db.version(1).stores({ 6 | tweet: 'tweetId, date, text' 7 | }); 8 | db.open(); 9 | return db; 10 | } 11 | 12 | function isoDate(date) { 13 | return date.getUTCFullYear() + "-" + (date.getUTCMonth() + 1) + "-" + date.getUTCDate(); 14 | } 15 | 16 | // http://stackoverflow.com/a/901144 17 | function getParameterByName(name, url) { 18 | if (!url) url = window.location.href; 19 | name = name.replace(/[\[\]]/g, "\\$&"); 20 | var regex = new RegExp("[?&]" + name + "(=([^&#]*)|&|#|$)"), 21 | results = regex.exec(url); 22 | if (!results) return null; 23 | if (!results[2]) return ''; 24 | return decodeURIComponent(results[2].replace(/\+/g, " ")); 25 | } 26 | 27 | // http://stackoverflow.com/a/21535234 28 | function executeScripts(tabId, injectDetailsArray) 29 | { 30 | function createCallback(tabId, injectDetails, innerCallback) { 31 | return function () { 32 | chrome.tabs.executeScript(tabId, injectDetails, innerCallback); 33 | }; 34 | } 35 | 36 | var callback = null; 37 | 38 | for (var i = injectDetailsArray.length - 1; i >= 0; --i) 39 | callback = createCallback(tabId, injectDetailsArray[i], callback); 40 | 41 | if (callback !== null) 42 | callback(); // execute outermost function 43 | } 44 | 45 | function log(msg) { 46 | var endlog = $("#endlog"); 47 | endlog.append(msg).append("
"); 48 | console.log(msg); 49 | } -------------------------------------------------------------------------------- /inject.js: -------------------------------------------------------------------------------- 1 | var loaded = false; 2 | function loop() { 3 | if (loaded) { 4 | items = $(".js-stream-item") 5 | .not("[vacuumed=true]") 6 | .each(function() { 7 | var item = $(this); 8 | 9 | chrome.runtime.sendMessage({ 10 | action: "add", 11 | data: { 12 | tweetId: item.attr("data-item-id"), 13 | date: parseInt(item.find(".js-short-timestamp").attr("data-time-ms")), 14 | text: item.find(".js-tweet-text").text() 15 | } 16 | }); 17 | item.attr("vacuumed", true); 18 | item.hide(); 19 | }); 20 | } else { 21 | if (window.jQuery) { 22 | loaded = true; 23 | 24 | $(".topbar").remove(); 25 | $("#page-container").css("paddingTop", "0px"); 26 | $(".AdaptiveFiltersBar").remove(); 27 | $(".SidebarCommonModules").parent().remove(); 28 | // We need this so that we can scroll to the bottom and trigger Twitter's infinite scroll code 29 | $("#page-container").append($("
", { 30 | style: "height: 10000px; width: 1px;" 31 | })); 32 | 33 | setInterval(scroll, 500); 34 | } 35 | } 36 | 37 | setTimeout(loop, 1000); 38 | } 39 | 40 | // Prefer to use setTimeout to not have multiple occurences of loop() 41 | // running at the same time 42 | setTimeout(loop, 1000); 43 | 44 | var up; 45 | function scroll() { 46 | if (up) { 47 | window.scrollTo(0,0); 48 | } else { 49 | window.scrollTo(0, document.body.scrollHeight); 50 | } 51 | up = !up; 52 | } 53 | 54 | chrome.runtime.onMessage.addListener( 55 | function(request, sender, sendResponse) { 56 | if (request.action == "stop") { 57 | window.close(); 58 | location.href = "about:blank"; 59 | } 60 | }); -------------------------------------------------------------------------------- /logo.pdn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo.pdn -------------------------------------------------------------------------------- /logo_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo_128.png -------------------------------------------------------------------------------- /logo_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo_16.png -------------------------------------------------------------------------------- /logo_48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo_48.png -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TweetVacuum", 3 | "description": "Sucks up a bunch of tweets from the Twitter search tool, bypassing the 3200 API limit", 4 | "version": "1.0", 5 | "background" : { 6 | "scripts": ["background.js"], 7 | "persistent": false 8 | }, 9 | "browser_action": { 10 | "default_title": "Tweets, man" 11 | }, 12 | "icons": { "16": "logo_16.png", 13 | "48": "logo_48.png", 14 | "128": "logo_128.png" }, 15 | "permissions": [ 16 | "declarativeContent", 17 | "tabs", 18 | "storage", 19 | "unlimitedStorage", 20 | "https://twitter.com/" 21 | ], 22 | "manifest_version": 2 23 | } -------------------------------------------------------------------------------- /popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Tweet Vacuum 5 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 |
31 |
32 |
33 |

TweetVacuum

34 | Github
35 | By Astra West 36 |
37 | 38 | 39 |
40 |

41 | 42 |

43 |
44 | 45 | 46 |
47 |
48 | 49 | -------------------------------------------------------------------------------- /popup.js: -------------------------------------------------------------------------------- 1 | var username, db; 2 | var windowId, tabId; 3 | var timeout, count; 4 | var dostop = false; 5 | 6 | function start(e) { 7 | dostop = false; 8 | $("#start").hide(); 9 | $("#stop").show(); 10 | 11 | username = $("#username").val().toLowerCase(); 12 | log("Running for user: " + username); 13 | db = createDb(username); 14 | 15 | newSearch(); 16 | } 17 | 18 | function newSearch() { 19 | if (!dostop) { 20 | db.tweet 21 | .orderBy("date") 22 | .limit(1) 23 | .toArray() 24 | .then(function(tweet) { 25 | if (tweet[0]) { 26 | var date = new Date(tweet[0].date); 27 | log("Found existing data, latest date " + isoDate(date)); 28 | date.setDate(date.getDate() + 1) 29 | launchSearch(date); 30 | } else { 31 | var date = new Date(); 32 | date.setDate(date.getDate() + 1) 33 | launchSearch(date); 34 | } 35 | }) 36 | .catch(function() {}); 37 | } 38 | } 39 | 40 | function launchSearch(lastDate) { 41 | log("Searching for tweets before " + isoDate(lastDate)); 42 | 43 | var query = "from:" + username + " until:" + isoDate(lastDate); 44 | var url = "https://twitter.com/search?f=tweets&q=" + encodeURIComponent(query); 45 | 46 | if (windowId) { 47 | chrome.tabs.create({ 48 | url: url, 49 | windowId: windowId 50 | }, inject); 51 | } else { 52 | log("Opening new work window"); 53 | chrome.windows.create({ 54 | url: url, 55 | }, function(newwindow) { 56 | windowId = newwindow.id; 57 | inject(newwindow.tabs[0]); 58 | }); 59 | } 60 | 61 | var endlog = $("#endlog"); 62 | endlog.append("Current count: ").append("0").append("
"); 63 | count = 0; 64 | } 65 | 66 | function inject(tab) { 67 | if (tabId) { 68 | chrome.tabs.remove(tabId); 69 | } 70 | tabId = tab.id; 71 | 72 | executeScripts(tabId, [ 73 | { file: "jquery-3.1.0.js" }, 74 | { file: "inject.js"} 75 | ]); 76 | 77 | timeout = setTimeout(searchFailure, 10000); 78 | } 79 | 80 | chrome.runtime.onMessage.addListener( 81 | function(request, sender, sendResponse) { 82 | if (request.action == "add") { 83 | clearTimeout(timeout); 84 | timeout = setTimeout(searchFailure, 10000); 85 | 86 | count++; 87 | $(".count").last().text(count); 88 | 89 | db.tweet.add(request.data).catch(function (error) { 90 | console.log(error); 91 | }); 92 | } 93 | }); 94 | 95 | function searchFailure() { 96 | log("Tab crashed or failed, restarting"); 97 | newSearch(); 98 | } 99 | 100 | function stop(e) { 101 | dostop = true; 102 | $("#start").show(); 103 | $("#stop").hide(); 104 | 105 | log("Stopping"); 106 | chrome.tabs.sendMessage(tabId, { 107 | action: "stop" 108 | }); 109 | clearTimeout(timeout); 110 | tabId = ""; 111 | windowId = ""; 112 | } 113 | 114 | function downloadDb() { 115 | username = $("#username").val().toLowerCase(); 116 | db = createDb(username); 117 | log("Exporting data as JSON"); 118 | db.tweet 119 | .toArray(function(data) { 120 | var blob = new Blob([JSON.stringify(data)], {type: "application/json"}); 121 | $("#downloadAnchor").attr("href", URL.createObjectURL(blob)); 122 | $("#downloadAnchor").attr("download", username + ".json"); 123 | $("#downloadAnchor")[0].click(); 124 | log(data.length + " rows exported"); 125 | }); 126 | } 127 | 128 | function deleteDb() { 129 | username = $("#username").val().toLowerCase(); 130 | Dexie.delete(username); 131 | log("Database deleted for " + username); 132 | } 133 | 134 | $(document).ready(function() { 135 | $("#start").click(start); 136 | $("#stop").click(stop); 137 | $("#delete").click(deleteDb); 138 | $("#download").click(downloadDb); 139 | $('#form').submit(function () { 140 | return false; 141 | }); 142 | }); 143 | --------------------------------------------------------------------------------