├── .gitignore ├── README.md └── app.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | data/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Node.js Slack Channel Scraper 2 | 3 | The following code pulls a full channel history from the Slack API, exports 4 | the message and user data to .json files, then parses an HTML mockup of 5 | the history in a similar style to Slack. 6 | 7 | ### Prerequisites 8 | 9 | Am installation of Node.js is required in order to run this Slack scraper. Download the packaged installer directly from [nodejs.org](https://nodejs.org/) or if you're using a Mac, you can install it with Homebrew. 10 | 11 | Open a terminal window and run: 12 | 13 | ``` 14 | $ brew install node 15 | ``` 16 | 17 | ### Running the code 18 | 19 | To use, set the `token` variable to your legacy user token; 20 | _(Issue a legacy token from the [Slack API Help Center](https://api.slack.com/custom-integrations/legacy-tokens))_ 21 | 22 | then set the `channel` variable to the internal channel id of the channel you want to scrape. 23 | _(Find a channel's id with the test tool in the [Slack API](https://api.slack.com/methods/channels.list/test) documentation)_ 24 | 25 | Finally, in the console, `cd` into the directory and run: 26 | 27 | ``` 28 | $ node app.js 29 | ``` 30 | 31 | then open `data/history.html` in a browser. -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Node.js Slack Channel Scraper 4 | // 5 | // 6 | // The following code pulls a full channel history from the Slack API, exports 7 | // the message and user data to .json files, then parses an HTML mockup of 8 | // the history in a similar style to Slack. 9 | // 10 | // To use, set the `token` variable to your legacy user token; 11 | // (Issue a legacy token at https://api.slack.com/custom-integrations/legacy-tokens) 12 | // 13 | // then set the `channel` variable to the internal channel id of the channel you want to scrape. 14 | // (Find a channel's id with the Slack API at https://api.slack.com/methods/channels.list/test) 15 | // 16 | // Finally just run `node app.js` in the console and open `data/history.html`. 17 | // 18 | 19 | const https = require('https'); 20 | const fs = require('fs'); 21 | 22 | const token = 'INSERT_TOKEN_HERE'; 23 | const channel = 'INSERT_CHANNEL_ID_HERE'; 24 | 25 | let users; 26 | let messages; 27 | 28 | let messagesAPIData = []; 29 | 30 | function createDirectory() { 31 | return new Promise(resolve => { 32 | if (!fs.existsSync('./data')) { 33 | fs.mkdir('./data', (err) => { 34 | if(err) console.log(err); 35 | 36 | resolve(); 37 | }); 38 | } else { 39 | resolve(); 40 | } 41 | }); 42 | } 43 | 44 | function getMessages(timestamp) { 45 | return new Promise(resolve => { 46 | https.get(`https://slack.com/api/channels.history?token=${token}&channel=${channel}&pretty=1&latest=${timestamp || ''}`, (res) => { 47 | const {statusCode} = res; 48 | const contentType = res.headers['content-type']; 49 | 50 | let error; 51 | if (statusCode !== 200) { 52 | error = new Error('Request Failed.\n' + `Status Code: ${statusCode}`); 53 | } else if (!/^application\/json/.test(contentType)) { 54 | error = new Error('Invalid content-type.\n' + `Expected application/json but received ${contentType}`); 55 | } 56 | if (error) { 57 | console.error(error.message); 58 | // consume response data to free up memory 59 | res.resume(); 60 | return; 61 | } 62 | 63 | res.setEncoding('utf8'); 64 | let rawData = ''; 65 | res.on('data', (chunk) => { 66 | rawData += chunk; 67 | }); 68 | res.on('end', () => { 69 | try { 70 | let parsedData = JSON.parse(rawData); 71 | messagesAPIData = messagesAPIData.concat(parsedData.messages); 72 | 73 | if (parsedData.has_more) { 74 | resolve(getMessages(parsedData.messages[parsedData.messages.length - 1].ts)); 75 | } else { 76 | messages = messagesAPIData; 77 | 78 | fs.writeFile('./data/messages.json', JSON.stringify(messages), (err) => { 79 | if (err) console.log(err); 80 | 81 | console.log('> Successfully downloaded and wrote Messages to messages.json'); 82 | resolve(); 83 | }); 84 | } 85 | } catch (e) { 86 | console.error(e.message); 87 | } 88 | }); 89 | }).on('error', (e) => { 90 | console.error(`Got error: ${e.message}`); 91 | }); 92 | }); 93 | } 94 | 95 | function getUsers() { 96 | return new Promise(resolve => { 97 | https.get(`https://slack.com/api/users.list?token=${token}&pretty=1`, (res) => { 98 | const {statusCode} = res; 99 | const contentType = res.headers['content-type']; 100 | 101 | let error; 102 | if (statusCode !== 200) { 103 | error = new Error('Request Failed.\n' + `Status Code: ${statusCode}`); 104 | } else if (!/^application\/json/.test(contentType)) { 105 | error = new Error('Invalid content-type.\n' + `Expected application/json but received ${contentType}`); 106 | } 107 | if (error) { 108 | console.error(error.message); 109 | // consume response data to free up memory 110 | res.resume(); 111 | return; 112 | } 113 | 114 | res.setEncoding('utf8'); 115 | let rawData = ''; 116 | res.on('data', (chunk) => { 117 | rawData += chunk; 118 | }); 119 | res.on('end', () => { 120 | try { 121 | users = JSON.parse(rawData).members; 122 | 123 | fs.writeFile('./data/users.json', JSON.stringify(users), (err) => { 124 | if (err) console.log(err); 125 | 126 | console.log('> Successfully downloaded and wrote Users to users.json'); 127 | resolve(); 128 | }); 129 | } catch (e) { 130 | console.error(e.message); 131 | } 132 | }); 133 | }).on('error', (e) => { 134 | console.error(`Got error: ${e.message}`); 135 | }); 136 | }); 137 | } 138 | 139 | function setUsersAndMessages() { 140 | return new Promise(resolve => { 141 | fs.readFile('./data/messages.json', 'utf-8', (err, data) => { 142 | if (err) { 143 | if (err.code === 'ENOENT') { 144 | return resolve(false); 145 | } else { 146 | console.log(err); 147 | } 148 | } 149 | 150 | messages = JSON.parse(data); 151 | messages = messages.messages; 152 | 153 | fs.readFile('./data/users.json', 'utf-8', (err, data) => { 154 | if (err) { 155 | if (err.code === 'ENOENT') { 156 | return resolve(false); 157 | } else { 158 | console.log(err); 159 | } 160 | } 161 | 162 | users = JSON.parse(data); 163 | users = users.members; 164 | 165 | resolve(true); 166 | }); 167 | }); 168 | }); 169 | } 170 | 171 | function checkExistingFiles(exist) { 172 | return new Promise(resolve => { 173 | if(exist) { 174 | console.log('> Local files found, proceeding with local data'); 175 | resolve(); 176 | } else { 177 | console.log('> No local files found, retrieving data from Slack'); 178 | createDirectory() 179 | .then(getMessages) 180 | .then(getUsers) 181 | .then(() => resolve()); 182 | } 183 | }); 184 | } 185 | 186 | function formatUsers() { 187 | return new Promise(resolve => { 188 | const newUsers = {}; 189 | let idx = 0, 190 | id; 191 | 192 | users.forEach(user => { 193 | idx++; 194 | 195 | id = user.id; 196 | newUsers[id] = user; 197 | 198 | if(idx === users.length) { 199 | users = newUsers; 200 | resolve(); 201 | } 202 | }); 203 | }); 204 | } 205 | 206 | function createHTML(messages) { 207 | return new Promise(resolve => { 208 | process.stdout.write('\n> Messages successfully parsed\n'); 209 | 210 | const writeStream = fs.createWriteStream('./data/history.html', { encoding: 'utf8' }); 211 | writeStream.write(` 212 | 213 |
214 | 215 |