├── LICENSE
├── README.md
├── background.js
├── common.js
├── dexie.js
├── inject.js
├── jquery-3.1.0.js
├── logo.pdn
├── logo_128.png
├── logo_16.png
├── logo_48.png
├── manifest.json
├── popup.html
└── popup.js
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Astra West
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # TweetVacuum
3 | Chrome extension to scrape a user's entire timeline, bypassing the Twitter API 3200 tweet limit.
4 |
5 | ## Usage
6 |
7 | Usage is simple. Click the icon in the top right of Chrome. Type in the username of the account you're using and click start.
8 |
9 | A new window will open up and start searching for tweets by that user. As the script runs, it will save the tweets and remove them from the search page. The search page starts to slow down significantly after ~1000 tweets, so the script will restart the tab and keep going where it left off.
10 |
11 | When it reaches the end of the timeline, make sure to stop the script. Currently it can't detect the difference between no more results and a crashed tab.
12 |
13 | The script will resume from where it left off if stopped.
14 |
15 | ## Installation
16 |
17 | ### Chrome Web Store
18 | The extension is published on the Chrome Web Store but unlisted. You can download it [here](https://chrome.google.com/webstore/detail/tweetvacuum/ieanpikkfcbeakclfkoeccpdcmfapjfl).
19 |
20 | ### Manually
21 | 1. Clone this repo or download it as a ZIP and extract it
22 | 2. Open the Chrome Extensions page
23 | 3. Check "Developer mode"
24 | 4. Click "Load unpacked extension..." and select the folder for the repo
25 |
--------------------------------------------------------------------------------
/background.js:
--------------------------------------------------------------------------------
1 | chrome.browserAction.onClicked.addListener(function(activeTab)
2 | {
3 | var newURL = chrome.extension.getURL("popup.html");
4 | chrome.tabs.create({ url: newURL });
5 | });
--------------------------------------------------------------------------------
/common.js:
--------------------------------------------------------------------------------
1 | // This file is shared by popup and inject for code reuse
2 |
3 | function createDb(username) {
4 | db = new Dexie(username);
5 | db.version(1).stores({
6 | tweet: 'tweetId, date, text'
7 | });
8 | db.open();
9 | return db;
10 | }
11 |
12 | function isoDate(date) {
13 | return date.getUTCFullYear() + "-" + (date.getUTCMonth() + 1) + "-" + date.getUTCDate();
14 | }
15 |
16 | // http://stackoverflow.com/a/901144
17 | function getParameterByName(name, url) {
18 | if (!url) url = window.location.href;
19 | name = name.replace(/[\[\]]/g, "\\$&");
20 | var regex = new RegExp("[?&]" + name + "(=([^]*)|&|#|$)"),
21 | results = regex.exec(url);
22 | if (!results) return null;
23 | if (!results[2]) return '';
24 | return decodeURIComponent(results[2].replace(/\+/g, " "));
25 | }
26 |
27 | // http://stackoverflow.com/a/21535234
28 | function executeScripts(tabId, injectDetailsArray)
29 | {
30 | function createCallback(tabId, injectDetails, innerCallback) {
31 | return function () {
32 | chrome.tabs.executeScript(tabId, injectDetails, innerCallback);
33 | };
34 | }
35 |
36 | var callback = null;
37 |
38 | for (var i = injectDetailsArray.length - 1; i >= 0; --i)
39 | callback = createCallback(tabId, injectDetailsArray[i], callback);
40 |
41 | if (callback !== null)
42 | callback(); // execute outermost function
43 | }
44 |
45 | function log(msg) {
46 | var endlog = $("#endlog");
47 | endlog.append(msg).append("
");
48 | console.log(msg);
49 | }
--------------------------------------------------------------------------------
/inject.js:
--------------------------------------------------------------------------------
1 | var loaded = false;
2 | function loop() {
3 | if (loaded) {
4 | items = $(".js-stream-item")
5 | .not("[vacuumed=true]")
6 | .each(function() {
7 | var item = $(this);
8 |
9 | chrome.runtime.sendMessage({
10 | action: "add",
11 | data: {
12 | tweetId: item.attr("data-item-id"),
13 | date: parseInt(item.find(".js-short-timestamp").attr("data-time-ms")),
14 | text: item.find(".js-tweet-text").text()
15 | }
16 | });
17 | item.attr("vacuumed", true);
18 | item.hide();
19 | });
20 | } else {
21 | if (window.jQuery) {
22 | loaded = true;
23 |
24 | $(".topbar").remove();
25 | $("#page-container").css("paddingTop", "0px");
26 | $(".AdaptiveFiltersBar").remove();
27 | $(".SidebarCommonModules").parent().remove();
28 | // We need this so that we can scroll to the bottom and trigger Twitter's infinite scroll code
29 | $("#page-container").append($("
", {
30 | style: "height: 10000px; width: 1px;"
31 | }));
32 |
33 | setInterval(scroll, 500);
34 | }
35 | }
36 |
37 | setTimeout(loop, 1000);
38 | }
39 |
40 | // Prefer to use setTimeout to not have multiple occurences of loop()
41 | // running at the same time
42 | setTimeout(loop, 1000);
43 |
44 | var up;
45 | function scroll() {
46 | if (up) {
47 | window.scrollTo(0,0);
48 | } else {
49 | window.scrollTo(0, document.body.scrollHeight);
50 | }
51 | up = !up;
52 | }
53 |
54 | chrome.runtime.onMessage.addListener(
55 | function(request, sender, sendResponse) {
56 | if (request.action == "stop") {
57 | window.close();
58 | location.href = "about:blank";
59 | }
60 | });
--------------------------------------------------------------------------------
/logo.pdn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo.pdn
--------------------------------------------------------------------------------
/logo_128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo_128.png
--------------------------------------------------------------------------------
/logo_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo_16.png
--------------------------------------------------------------------------------
/logo_48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UberKitten/TweetVacuum/fd13e0de395f428aedef7947a7f156bea3810fd8/logo_48.png
--------------------------------------------------------------------------------
/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "TweetVacuum",
3 | "description": "Sucks up a bunch of tweets from the Twitter search tool, bypassing the 3200 API limit",
4 | "version": "1.0",
5 | "background" : {
6 | "scripts": ["background.js"],
7 | "persistent": false
8 | },
9 | "browser_action": {
10 | "default_title": "Tweets, man"
11 | },
12 | "icons": { "16": "logo_16.png",
13 | "48": "logo_48.png",
14 | "128": "logo_128.png" },
15 | "permissions": [
16 | "declarativeContent",
17 | "tabs",
18 | "storage",
19 | "unlimitedStorage",
20 | "https://twitter.com/"
21 | ],
22 | "manifest_version": 2
23 | }
--------------------------------------------------------------------------------
/popup.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
Tweet Vacuum
5 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
48 |
49 |
--------------------------------------------------------------------------------
/popup.js:
--------------------------------------------------------------------------------
1 | var username, db;
2 | var windowId, tabId;
3 | var timeout, count;
4 | var dostop = false;
5 |
6 | function start(e) {
7 | dostop = false;
8 | $("#start").hide();
9 | $("#stop").show();
10 |
11 | username = $("#username").val().toLowerCase();
12 | log("Running for user: " + username);
13 | db = createDb(username);
14 |
15 | newSearch();
16 | }
17 |
18 | function newSearch() {
19 | if (!dostop) {
20 | db.tweet
21 | .orderBy("date")
22 | .limit(1)
23 | .toArray()
24 | .then(function(tweet) {
25 | if (tweet[0]) {
26 | var date = new Date(tweet[0].date);
27 | log("Found existing data, latest date " + isoDate(date));
28 | date.setDate(date.getDate() + 1)
29 | launchSearch(date);
30 | } else {
31 | var date = new Date();
32 | date.setDate(date.getDate() + 1)
33 | launchSearch(date);
34 | }
35 | })
36 | .catch(function() {});
37 | }
38 | }
39 |
40 | function launchSearch(lastDate) {
41 | log("Searching for tweets before " + isoDate(lastDate));
42 |
43 | var query = "from:" + username + " until:" + isoDate(lastDate);
44 | var url = "https://twitter.com/search?f=tweets&q=" + encodeURIComponent(query);
45 |
46 | if (windowId) {
47 | chrome.tabs.create({
48 | url: url,
49 | windowId: windowId
50 | }, inject);
51 | } else {
52 | log("Opening new work window");
53 | chrome.windows.create({
54 | url: url,
55 | }, function(newwindow) {
56 | windowId = newwindow.id;
57 | inject(newwindow.tabs[0]);
58 | });
59 | }
60 |
61 | var endlog = $("#endlog");
62 | endlog.append("Current count: ").append("
0").append("
");
63 | count = 0;
64 | }
65 |
66 | function inject(tab) {
67 | if (tabId) {
68 | chrome.tabs.remove(tabId);
69 | }
70 | tabId = tab.id;
71 |
72 | executeScripts(tabId, [
73 | { file: "jquery-3.1.0.js" },
74 | { file: "inject.js"}
75 | ]);
76 |
77 | timeout = setTimeout(searchFailure, 10000);
78 | }
79 |
80 | chrome.runtime.onMessage.addListener(
81 | function(request, sender, sendResponse) {
82 | if (request.action == "add") {
83 | clearTimeout(timeout);
84 | timeout = setTimeout(searchFailure, 10000);
85 |
86 | count++;
87 | $(".count").last().text(count);
88 |
89 | db.tweet.add(request.data).catch(function (error) {
90 | console.log(error);
91 | });
92 | }
93 | });
94 |
95 | function searchFailure() {
96 | log("Tab crashed or failed, restarting");
97 | newSearch();
98 | }
99 |
100 | function stop(e) {
101 | dostop = true;
102 | $("#start").show();
103 | $("#stop").hide();
104 |
105 | log("Stopping");
106 | chrome.tabs.sendMessage(tabId, {
107 | action: "stop"
108 | });
109 | clearTimeout(timeout);
110 | tabId = "";
111 | windowId = "";
112 | }
113 |
114 | function downloadDb() {
115 | username = $("#username").val().toLowerCase();
116 | db = createDb(username);
117 | log("Exporting data as JSON");
118 | db.tweet
119 | .toArray(function(data) {
120 | var blob = new Blob([JSON.stringify(data)], {type: "application/json"});
121 | $("#downloadAnchor").attr("href", URL.createObjectURL(blob));
122 | $("#downloadAnchor").attr("download", username + ".json");
123 | $("#downloadAnchor")[0].click();
124 | log(data.length + " rows exported");
125 | });
126 | }
127 |
128 | function deleteDb() {
129 | username = $("#username").val().toLowerCase();
130 | Dexie.delete(username);
131 | log("Database deleted for " + username);
132 | }
133 |
134 | $(document).ready(function() {
135 | $("#start").click(start);
136 | $("#stop").click(stop);
137 | $("#delete").click(deleteDb);
138 | $("#download").click(downloadDb);
139 | $('#form').submit(function () {
140 | return false;
141 | });
142 | });
143 |
--------------------------------------------------------------------------------