├── MANIFEST.in ├── requirements.txt ├── README.md ├── .gitignore ├── LICENSE.md ├── setup.py └── bin └── slack-exporter /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.10.0 2 | slacker==0.9.16 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # slack-exporter 2 | Export data from Slack as a non-admin user. This script uses Slack's API to 3 | export history from each channel chunk by chunk. 4 | 5 | Note that the format of channel logs is JSON, not NDJSON. We should probably 6 | make it possible to output NDJSON using a command line option. There are many 7 | things that would be nice to have so feel free to contribute and submit pull 8 | requests. 9 | 10 | ## Usage 11 | Generate a test Slack API token [here](https://api.slack.com/web) and put it in 12 | a file called `./env` like this: 13 | ``` 14 | export SLACK_TOKEN=xoxp-123456... 15 | ``` 16 | Then every time you start your shell just `cd` into this directory, do `source 17 | ./env` and run the exporter with your desired options. 18 | 19 | ```sh 20 | ./slack-exporter --min-members 17 --date-start 2016-06-01 21 | ``` 22 | 23 | Find more options in `./slack-exporter -h`. 24 | 25 | ## Development setup 26 | ```sh 27 | pyenv virtualenv 3.5.0 slack-exporter 28 | pyenv local slack-exporter 29 | ``` 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /data 2 | /env 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | #Ipython Notebook 64 | .ipynb_checkpoints 65 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Lee Archer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import pip 4 | 5 | try: 6 | from setuptools import setup 7 | except ImportError: 8 | from distutils.core import setup 9 | 10 | links = [] 11 | requires = [] 12 | 13 | requirements = pip.req.parse_requirements( 14 | "requirements.txt", session=pip.download.PipSession()) 15 | 16 | for item in requirements: 17 | # we want to handle package names and also repo urls 18 | if getattr(item, "url", None): # older pip has url 19 | links.append(str(item.url)) 20 | 21 | if getattr(item, "link", None): # newer pip has link 22 | links.append(str(item.link)) 23 | 24 | if item.req: 25 | requires.append(str(item.req)) 26 | 27 | config = { 28 | "description": "Export data from Slack as a non-admin user", 29 | "author": "Lee Archer", 30 | "url": "http://blog.archer.onl/article/" + 31 | "export-all-slack-logs-as-a-non-admin-user/", 32 | 33 | "download_url": "https://github.com/lbn/slack-exporter", 34 | "author_email": "lee+github@archer.onl", 35 | "version": "0.0.1", 36 | "packages": [], 37 | "scripts": [ 38 | "bin/slack-exporter" 39 | ], 40 | "name": "slack-exporter", 41 | "install_requires": requires, 42 | "dependency_links": links 43 | } 44 | 45 | setup(**config) 46 | -------------------------------------------------------------------------------- /bin/slack-exporter: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | File: slack-exporter 5 | Author: Lee Archer 6 | Email: lee+github@archer.onl 7 | Github: https://github.com/lbn/slack-exporter 8 | Description: Export data from Slack as a non-admin user 9 | """ 10 | 11 | import os 12 | import sys 13 | import json 14 | import time 15 | import argparse 16 | from datetime import datetime, date 17 | 18 | from slacker import Slacker 19 | 20 | BEGINNING_OF_TIME = "the beginning of time" 21 | 22 | 23 | class SlackExporter: 24 | def __init__(self, token, data_dir): 25 | self.slack = Slacker(token) 26 | self.data_dir = data_dir 27 | if not os.path.exists(data_dir): 28 | os.mkdir(data_dir) 29 | 30 | self.logs_dir = os.path.join(data_dir, "logs") 31 | 32 | if not os.path.exists(self.logs_dir): 33 | os.mkdir(self.logs_dir) 34 | 35 | def users(self): 36 | users = self.slack.users.list().body["members"] 37 | 38 | with open(os.path.join(self.data_dir, "users.json"), "w") as f: 39 | json.dump(users, f) 40 | 41 | def channels(self, date_from, date_to=None, min_members=None, 42 | no_archived=True, channel_name=None): 43 | def matches(chan): 44 | match = True 45 | if min_members is not None: 46 | match &= chan["num_members"] >= min_members 47 | 48 | if no_archived: 49 | match &= not chan["is_archived"] 50 | 51 | if channel_name is not None: 52 | match &= chan["name"] == channel_name 53 | return match 54 | 55 | channels = [chan for chan in 56 | self.slack.channels.list().body["channels"] 57 | if matches(chan)] 58 | 59 | # Export channel data without history 60 | with open(os.path.join(self.data_dir, "channels.json"), "w") as f: 61 | json.dump(channels, f) 62 | 63 | print("Processing", len(channels), "channels") 64 | 65 | for channel in channels: 66 | self.channel_history(channel, date_from, date_to) 67 | time.sleep(1) 68 | 69 | def channel_history(self, channel, date_from, date_to): 70 | messages = [] 71 | oldest = max(0, time.mktime(date_from.timetuple())) 72 | latest = time.mktime(date_to.timetuple()) 73 | 74 | # This method works by setting the latest date to the oldest date of 75 | # the current chunk of messages. The oldest message happens to be the 76 | # last message. 77 | # ------------- [---] 78 | # ---------- [---]--- 79 | # -------[---]--- --- 80 | has_more = True 81 | while has_more: 82 | body = self.slack.channels.history( 83 | channel=channel["id"], latest=latest, oldest=oldest, 84 | count=100).body 85 | 86 | msgs = body["messages"] 87 | if len(msgs) == 0: 88 | break 89 | 90 | messages += msgs 91 | latest = int(float(msgs[-1]["ts"])) 92 | has_more = body["has_more"] 93 | 94 | chan_log = os.path.join(self.logs_dir, channel["name"]+".json") 95 | with open(chan_log, "w") as f: 96 | json.dump(messages, f) 97 | 98 | 99 | def valid_date(s): 100 | try: 101 | if s == BEGINNING_OF_TIME: 102 | return date.min 103 | return datetime.strptime(s, "%Y-%m-%d").date() 104 | except ValueError: 105 | msg = "Not a valid date: '{0}'.".format(s) 106 | raise argparse.ArgumentTypeError(msg) 107 | 108 | 109 | def main(): 110 | token = os.getenv("SLACK_TOKEN") 111 | if token is None: 112 | print("Please set the SLACK_TOKEN environment variable to " + 113 | "your Slack token", file=sys.stderr) 114 | sys.exit(1) 115 | 116 | parser = argparse.ArgumentParser( 117 | description="Export data from Slack as a non-admin user", 118 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 119 | ) 120 | parser.add_argument( 121 | "--data", 122 | type=str, 123 | metavar="DATA_DIR", 124 | default="data", 125 | help="Data directory where channel, " + 126 | "user and message data will be saved") 127 | 128 | parser.add_argument("--archived", type=bool, default=False, 129 | help="Also download history for archived channels") 130 | 131 | parser.add_argument("--users", type=bool, default=True, 132 | help="Save user data to DATA_DIR/users.json") 133 | 134 | parser.add_argument("--messages", type=bool, default=True, 135 | help="Save user data to DATA_DIR/channels") 136 | 137 | parser.add_argument("--min-members", type=int, default=0, 138 | help="Only export channels which have at least " + 139 | "this number of members") 140 | 141 | parser.add_argument("--date-start", 142 | help="History start date - format YYYY-MM-DD ", 143 | default=BEGINNING_OF_TIME, type=valid_date) 144 | 145 | parser.add_argument("--date-end", 146 | help="History end date - format YYYY-MM-DD ", 147 | required=False, 148 | default=date.today().isoformat(), 149 | type=valid_date) 150 | 151 | parser.add_argument("--channel", type=str, default=None, 152 | help="Export this channel only") 153 | 154 | args = parser.parse_args() 155 | 156 | exporter = SlackExporter(token, args.data) 157 | 158 | if args.date_start > args.date_end: 159 | print("beep boop: user error") 160 | print("Are you sure the start and end dates are the right way round?") 161 | sys.exit(1) 162 | 163 | if args.users: 164 | exporter.users() 165 | 166 | if args.messages: 167 | exporter.channels(args.date_start, args.date_end, args.min_members, 168 | not args.archived, args.channel) 169 | 170 | if __name__ == "__main__": 171 | main() 172 | --------------------------------------------------------------------------------