├── .gitignore ├── README.md └── app.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | data/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Node.js Slack Channel Scraper 2 | 3 | The following code pulls a full channel history from the Slack API, exports 4 | the message and user data to .json files, then parses an HTML mockup of 5 | the history in a similar style to Slack. 6 | 7 | ### Prerequisites 8 | 9 | Am installation of Node.js is required in order to run this Slack scraper. Download the packaged installer directly from [nodejs.org](https://nodejs.org/) or if you're using a Mac, you can install it with Homebrew. 10 | 11 | Open a terminal window and run: 12 | 13 | ``` 14 | $ brew install node 15 | ``` 16 | 17 | ### Running the code 18 | 19 | To use, set the `token` variable to your legacy user token; 20 | _(Issue a legacy token from the [Slack API Help Center](https://api.slack.com/custom-integrations/legacy-tokens))_ 21 | 22 | then set the `channel` variable to the internal channel id of the channel you want to scrape. 23 | _(Find a channel's id with the test tool in the [Slack API](https://api.slack.com/methods/channels.list/test) documentation)_ 24 | 25 | Finally, in the console, `cd` into the directory and run: 26 | 27 | ``` 28 | $ node app.js 29 | ``` 30 | 31 | then open `data/history.html` in a browser. -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Node.js Slack Channel Scraper 4 | // 5 | // 6 | // The following code pulls a full channel history from the Slack API, exports 7 | // the message and user data to .json files, then parses an HTML mockup of 8 | // the history in a similar style to Slack. 9 | // 10 | // To use, set the `token` variable to your legacy user token; 11 | // (Issue a legacy token at https://api.slack.com/custom-integrations/legacy-tokens) 12 | // 13 | // then set the `channel` variable to the internal channel id of the channel you want to scrape. 14 | // (Find a channel's id with the Slack API at https://api.slack.com/methods/channels.list/test) 15 | // 16 | // Finally just run `node app.js` in the console and open `data/history.html`. 17 | // 18 | 19 | const https = require('https'); 20 | const fs = require('fs'); 21 | 22 | const token = 'INSERT_TOKEN_HERE'; 23 | const channel = 'INSERT_CHANNEL_ID_HERE'; 24 | 25 | let users; 26 | let messages; 27 | 28 | let messagesAPIData = []; 29 | 30 | function createDirectory() { 31 | return new Promise(resolve => { 32 | if (!fs.existsSync('./data')) { 33 | fs.mkdir('./data', (err) => { 34 | if(err) console.log(err); 35 | 36 | resolve(); 37 | }); 38 | } else { 39 | resolve(); 40 | } 41 | }); 42 | } 43 | 44 | function getMessages(timestamp) { 45 | return new Promise(resolve => { 46 | https.get(`https://slack.com/api/channels.history?token=${token}&channel=${channel}&pretty=1&latest=${timestamp || ''}`, (res) => { 47 | const {statusCode} = res; 48 | const contentType = res.headers['content-type']; 49 | 50 | let error; 51 | if (statusCode !== 200) { 52 | error = new Error('Request Failed.\n' + `Status Code: ${statusCode}`); 53 | } else if (!/^application\/json/.test(contentType)) { 54 | error = new Error('Invalid content-type.\n' + `Expected application/json but received ${contentType}`); 55 | } 56 | if (error) { 57 | console.error(error.message); 58 | // consume response data to free up memory 59 | res.resume(); 60 | return; 61 | } 62 | 63 | res.setEncoding('utf8'); 64 | let rawData = ''; 65 | res.on('data', (chunk) => { 66 | rawData += chunk; 67 | }); 68 | res.on('end', () => { 69 | try { 70 | let parsedData = JSON.parse(rawData); 71 | messagesAPIData = messagesAPIData.concat(parsedData.messages); 72 | 73 | if (parsedData.has_more) { 74 | resolve(getMessages(parsedData.messages[parsedData.messages.length - 1].ts)); 75 | } else { 76 | messages = messagesAPIData; 77 | 78 | fs.writeFile('./data/messages.json', JSON.stringify(messages), (err) => { 79 | if (err) console.log(err); 80 | 81 | console.log('> Successfully downloaded and wrote Messages to messages.json'); 82 | resolve(); 83 | }); 84 | } 85 | } catch (e) { 86 | console.error(e.message); 87 | } 88 | }); 89 | }).on('error', (e) => { 90 | console.error(`Got error: ${e.message}`); 91 | }); 92 | }); 93 | } 94 | 95 | function getUsers() { 96 | return new Promise(resolve => { 97 | https.get(`https://slack.com/api/users.list?token=${token}&pretty=1`, (res) => { 98 | const {statusCode} = res; 99 | const contentType = res.headers['content-type']; 100 | 101 | let error; 102 | if (statusCode !== 200) { 103 | error = new Error('Request Failed.\n' + `Status Code: ${statusCode}`); 104 | } else if (!/^application\/json/.test(contentType)) { 105 | error = new Error('Invalid content-type.\n' + `Expected application/json but received ${contentType}`); 106 | } 107 | if (error) { 108 | console.error(error.message); 109 | // consume response data to free up memory 110 | res.resume(); 111 | return; 112 | } 113 | 114 | res.setEncoding('utf8'); 115 | let rawData = ''; 116 | res.on('data', (chunk) => { 117 | rawData += chunk; 118 | }); 119 | res.on('end', () => { 120 | try { 121 | users = JSON.parse(rawData).members; 122 | 123 | fs.writeFile('./data/users.json', JSON.stringify(users), (err) => { 124 | if (err) console.log(err); 125 | 126 | console.log('> Successfully downloaded and wrote Users to users.json'); 127 | resolve(); 128 | }); 129 | } catch (e) { 130 | console.error(e.message); 131 | } 132 | }); 133 | }).on('error', (e) => { 134 | console.error(`Got error: ${e.message}`); 135 | }); 136 | }); 137 | } 138 | 139 | function setUsersAndMessages() { 140 | return new Promise(resolve => { 141 | fs.readFile('./data/messages.json', 'utf-8', (err, data) => { 142 | if (err) { 143 | if (err.code === 'ENOENT') { 144 | return resolve(false); 145 | } else { 146 | console.log(err); 147 | } 148 | } 149 | 150 | messages = JSON.parse(data); 151 | messages = messages.messages; 152 | 153 | fs.readFile('./data/users.json', 'utf-8', (err, data) => { 154 | if (err) { 155 | if (err.code === 'ENOENT') { 156 | return resolve(false); 157 | } else { 158 | console.log(err); 159 | } 160 | } 161 | 162 | users = JSON.parse(data); 163 | users = users.members; 164 | 165 | resolve(true); 166 | }); 167 | }); 168 | }); 169 | } 170 | 171 | function checkExistingFiles(exist) { 172 | return new Promise(resolve => { 173 | if(exist) { 174 | console.log('> Local files found, proceeding with local data'); 175 | resolve(); 176 | } else { 177 | console.log('> No local files found, retrieving data from Slack'); 178 | createDirectory() 179 | .then(getMessages) 180 | .then(getUsers) 181 | .then(() => resolve()); 182 | } 183 | }); 184 | } 185 | 186 | function formatUsers() { 187 | return new Promise(resolve => { 188 | const newUsers = {}; 189 | let idx = 0, 190 | id; 191 | 192 | users.forEach(user => { 193 | idx++; 194 | 195 | id = user.id; 196 | newUsers[id] = user; 197 | 198 | if(idx === users.length) { 199 | users = newUsers; 200 | resolve(); 201 | } 202 | }); 203 | }); 204 | } 205 | 206 | function createHTML(messages) { 207 | return new Promise(resolve => { 208 | process.stdout.write('\n> Messages successfully parsed\n'); 209 | 210 | const writeStream = fs.createWriteStream('./data/history.html', { encoding: 'utf8' }); 211 | writeStream.write(` 212 | 213 | 214 | 215 | Slack Message History 216 | 270 | 271 | 272 |
273 | `); 274 | let index = 0; 275 | 276 | messages.forEach(message => { 277 | index++; 278 | 279 | process.stdout.clearLine(); 280 | process.stdout.cursorTo(0); 281 | process.stdout.write(`> Parsing HTML: ${index}`); 282 | 283 | let avatar = ''; 284 | 285 | if(message.user.profile) { 286 | avatar = message.user.profile.image_72; 287 | } 288 | 289 | writeStream.write(` 290 |
291 | ${message.user.real_name} 292 |
293 |
294 | ${message.user.real_name || message.user.name}   295 |
296 |
297 | ${message.text} 298 |
299 |
300 |
`); 301 | 302 | if (index === messages.length) { 303 | writeStream.write(`
304 | 305 | `); 306 | writeStream.end(); 307 | 308 | process.stdout.write('\n'); 309 | console.log('> HTML file successfully written to messageOutput.html'); 310 | 311 | resolve(); 312 | }; 313 | }); 314 | }); 315 | } 316 | 317 | function createMessages() { 318 | return new Promise(resolve => { 319 | 320 | let index = 0; 321 | let finalMessages = []; 322 | 323 | messages.forEach(message => { 324 | index++; 325 | 326 | process.stdout.clearLine(); 327 | process.stdout.cursorTo(0); 328 | process.stdout.write(`> Parsing messages: ${index}`); 329 | 330 | let data = { 331 | message: message 332 | }; 333 | 334 | Promise.resolve(data) 335 | .then(findUsername) 336 | .then(convertText) 337 | .then(convertTime) 338 | .then((data) => { 339 | finalMessages.push({ 340 | user: data.user, 341 | text: data.text, 342 | time: data.time 343 | }); 344 | 345 | if (index === messages.length) { 346 | resolve(finalMessages); 347 | }; 348 | }); 349 | }); 350 | }); 351 | } 352 | 353 | function findUsername(data) { 354 | return new Promise(resolve => { 355 | if(data.message.subtype === 'file_comment'){ 356 | data.user = users[data.message.comment.user]; 357 | } else if(data.message.subtype === 'bot_message'){ 358 | data.user = createBotUser(data.message); 359 | } else { 360 | data.user = users[data.message.user]; 361 | } 362 | 363 | if(!data.user) console.log(data.message); 364 | 365 | resolve(data); 366 | }); 367 | } 368 | 369 | function createBotUser(message) { 370 | let image = message.icons ? message.icons.image_48 : ''; 371 | 372 | return { 373 | id: message.bot_id, 374 | name: message.username, 375 | real_name: message.username, 376 | profile: { 377 | image_72: image 378 | } 379 | }; 380 | } 381 | 382 | function convertTime(data) { 383 | return new Promise(resolve => { 384 | let time = Math.floor(+data.message.ts) * 1000; 385 | let date = new Date(time); 386 | data.time = date.toGMTString(); 387 | resolve(data); 388 | }); 389 | } 390 | 391 | function convertText(data) { 392 | return new Promise(resolve => { 393 | let text = data.message.text; 394 | let userTagRegex = /<@[^ ]+>/g, 395 | urlTagRegex = /<(http.*?:\/\/.*?)>/g; 396 | 397 | let message = { 398 | userTags: text.match(userTagRegex), 399 | urlTags: text.match(urlTagRegex), 400 | text: text 401 | }; 402 | 403 | Promise.resolve(replaceUserTags(message)) 404 | .then(replaceUrlTags) 405 | .then((message) => { 406 | data.text = message.text; 407 | resolve(data); 408 | }); 409 | }); 410 | } 411 | 412 | function replaceUserTags(message) { 413 | return new Promise(resolve => { 414 | if(!message.userTags) resolve(message); 415 | 416 | let tagFilterRegex = /[^<@>]+/g, 417 | index = 0; 418 | 419 | message.userTags.forEach(tag => { 420 | index++; 421 | 422 | let filteredTag = tag.match(tagFilterRegex)[0]; 423 | let name = users[filteredTag].real_name || users[filteredTag].name; 424 | let htmlName = `${name}`; 425 | 426 | message.text = message.text.replace(tag, htmlName); 427 | 428 | if (index === message.userTags.length) resolve(message); 429 | }); 430 | }); 431 | } 432 | 433 | function replaceUrlTags(message) { 434 | return new Promise(resolve => { 435 | if(!message.urlTags) resolve(message); 436 | 437 | let index = 0; 438 | 439 | message.urlTags.forEach(tag => { 440 | index++; 441 | 442 | let filteredTag = tag.slice(1, -1); 443 | 444 | let link = filteredTag.split('|'); 445 | 446 | let htmlLink = `${link[0]}`; 447 | 448 | message.text = message.text.replace(tag, htmlLink); 449 | 450 | if (index === message.urlTags.length) resolve(message); 451 | }); 452 | }); 453 | } 454 | 455 | Promise.resolve(setUsersAndMessages()) 456 | .then(checkExistingFiles) 457 | .then(formatUsers) 458 | .then(createMessages) 459 | .then(createHTML) 460 | --------------------------------------------------------------------------------