└── slack_history.py /slack_history.py: -------------------------------------------------------------------------------- 1 | from slacker import Slacker 2 | import json 3 | import argparse 4 | import os 5 | import shutil 6 | import copy 7 | from datetime import datetime 8 | 9 | # This script finds all channels, private channels and direct messages 10 | # that your user participates in, downloads the complete history for 11 | # those converations and writes each conversation out to seperate json files. 12 | # 13 | # This user centric history gathering is nice because the official slack data exporter 14 | # only exports public channels. 15 | # 16 | # PS, this only works if your slack team has a paid account which allows for unlimited history. 17 | # 18 | # PPS, this use of the API is blessed by Slack. 19 | # https://get.slack.help/hc/en-us/articles/204897248 20 | # " If you want to export the contents of your own private groups and direct messages 21 | # please see our API documentation." 22 | # 23 | # get your slack user token at the bottom of this page 24 | # https://api.slack.com/web 25 | # 26 | # dependencies: 27 | # pip install slacker #https://github.com/os/slacker 28 | # 29 | # usage examples 30 | # python slack_history.py --token='123token' 31 | # python slack_history.py --token='123token' --dryRun=True 32 | # python slack_history.py --token='123token' --skipDirectMessages 33 | # python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels 34 | 35 | 36 | # fetches the complete message history for a channel/group/im 37 | # 38 | # pageableObject could be: 39 | # slack.channel 40 | # slack.groups 41 | # slack.im 42 | # 43 | # channelId is the id of the channel/group/im you want to download history for. 44 | def getHistory(pageableObject, channelId, pageSize = 100): 45 | messages = [] 46 | lastTimestamp = None 47 | 48 | while(True): 49 | response = pageableObject.history( 50 | channel = channelId, 51 | latest = lastTimestamp, 52 | oldest = 0, 53 | count = pageSize 54 | ).body 55 | 56 | messages.extend(response['messages']) 57 | 58 | if (response['has_more'] == True): 59 | lastTimestamp = messages[-1]['ts'] # -1 means last element in a list 60 | else: 61 | break 62 | return messages 63 | 64 | 65 | def mkdir(directory): 66 | if not os.path.isdir(directory): 67 | os.makedirs(directory) 68 | 69 | 70 | # create datetime object from slack timestamp ('ts') string 71 | def parseTimeStamp( timeStamp ): 72 | if '.' in timeStamp: 73 | t_list = timeStamp.split('.') 74 | if len( t_list ) != 2: 75 | raise ValueError( 'Invalid time stamp' ) 76 | else: 77 | return datetime.utcfromtimestamp( float(t_list[0]) ) 78 | 79 | 80 | # move channel files from old directory to one with new channel name 81 | def channelRename( oldRoomName, newRoomName ): 82 | # check if any files need to be moved 83 | if not os.path.isdir( oldRoomName ): 84 | return 85 | mkdir( newRoomName ) 86 | for fileName in os.listdir( oldRoomName ): 87 | shutil.move( os.path.join( oldRoomName, fileName ), newRoomName ) 88 | os.rmdir( oldRoomName ) 89 | 90 | 91 | def writeMessageFile( fileName, messages ): 92 | directory = os.path.dirname(fileName) 93 | 94 | if not os.path.isdir( directory ): 95 | mkdir( directory ) 96 | 97 | with open(fileName, 'w') as outFile: 98 | json.dump( messages, outFile, indent=4) 99 | 100 | 101 | # parse messages by date 102 | def parseMessages( parentDir, roomDir, messages, roomType ): 103 | nameChangeFlag = roomType + "_name" 104 | 105 | currentFileDate = '' 106 | currentMessages = [] 107 | for message in messages: 108 | #first store the date of the next message 109 | ts = parseTimeStamp( message['ts'] ) 110 | fileDate = '{:%Y-%m-%d}'.format(ts) 111 | 112 | #if it's on a different day, write out the previous day's messages 113 | if fileDate != currentFileDate: 114 | outFileName = '{parent}/{room}/{file}.json'.format( parent = parentDir, room = roomDir, file = currentFileDate ) 115 | writeMessageFile( outFileName, currentMessages ) 116 | currentFileDate = fileDate 117 | currentMessages = [] 118 | 119 | # check if current message is a name change 120 | # dms won't have name change events 121 | if roomType != "im" and ( 'subtype' in message ) and message['subtype'] == nameChangeFlag: 122 | roomDir = message['name'] 123 | oldRoomPath = '{parent}/{room}'.format( parent = parentDir, room = message['old_name'] ) 124 | newRoomPath = '{parent}/{room}'.format( parent = parentDir, room = roomDir ) 125 | channelRename( oldRoomPath, newRoomPath ) 126 | 127 | currentMessages.append( message ) 128 | outFileName = '{parent}/{room}/{file}.json'.format( parent = parentDir, room = roomDir, file = currentFileDate ) 129 | writeMessageFile( outFileName, currentMessages ) 130 | 131 | 132 | # fetch and write history for all public channels 133 | def getChannels(slack, dryRun): 134 | channels = slack.channels.list().body['channels'] 135 | 136 | print("\nfound channels: ") 137 | for channel in channels: 138 | print(channel['name']) 139 | 140 | if not dryRun: 141 | parentDir = "channel" 142 | mkdir(parentDir) 143 | for channel in channels: 144 | print("getting history for channel {0}".format(channel['name'])) 145 | channelDir = channel['name'] 146 | mkdir( os.path.join( parentDir, channelDir ) ) 147 | messages = getHistory(slack.channels, channel['id']) 148 | parseMessages( parentDir, channelDir, messages, 'channel') 149 | 150 | 151 | # write channels.json file 152 | def dumpChannelFile( slack ): 153 | print("Making channels file") 154 | channels = slack.channels.list().body['channels'] 155 | 156 | #have to convert private channels to channels to be read in properly 157 | groups = slack.groups.list().body['groups'] 158 | print( str(len(channels) ) ) 159 | for group in groups: 160 | print( str(len(channels) ) ) 161 | new_channel = copy.copy(channels[0]) 162 | new_channel['id'] = group['id'] 163 | new_channel['name'] = group['name'] 164 | new_channel['created'] = group['created'] 165 | new_channel['creator'] = group['creator'] 166 | new_channel['is_archived'] = group['is_archived'] 167 | new_channel['is_channel'] = True 168 | new_channel['is_general'] = False 169 | new_channel['is_member'] = True 170 | new_channel['members'] = group['members'] 171 | new_channel['num_members'] = len(group['members']) 172 | new_channel['purpose'] = group['purpose'] 173 | new_channel['topic'] = group['topic'] 174 | channels.append( new_channel ) 175 | 176 | #We will be overwriting this file on each run. 177 | with open('channels.json', 'w') as outFile: 178 | json.dump( channels , outFile, indent=4) 179 | 180 | 181 | # fetch and write history for all direct message conversations 182 | # also known as IMs in the slack API. 183 | def getDirectMessages(slack, ownerId, userIdNameMap, dryRun): 184 | dms = slack.im.list().body['ims'] 185 | 186 | print("\nfound direct messages (1:1) with the following users:") 187 | for dm in dms: 188 | print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")) 189 | 190 | if not dryRun: 191 | parentDir = "direct_message" 192 | mkdir(parentDir) 193 | for dm in dms: 194 | name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")#note: double check naming of dm directory 195 | print("getting history for direct messages with {0}".format(name)) 196 | dmDir = name 197 | mkdir('{parent}/{dm}'.format( parent = parentDir, dm = dmDir )) 198 | messages = getHistory(slack.im, dm['id']) 199 | parseMessages( parentDir, dmDir, messages, "im" ) 200 | 201 | 202 | # fetch and write history for all private channels 203 | # also known as groups in the slack API. 204 | def getPrivateChannels(slack, dryRun): 205 | groups = slack.groups.list().body['groups'] 206 | 207 | print("\nfound private channels:") 208 | for group in groups: 209 | print("{0}: ({1} members)".format(group['name'], len(group['members']))) 210 | 211 | if not dryRun: 212 | parentDir = "private_channels" 213 | mkdir(parentDir) 214 | for group in groups: 215 | messages = [] 216 | print("getting history for private channel {0} with id {1}".format(group['name'], group['id'])) 217 | groupDir = group['name'] 218 | mkdir( '{parent}/{group}'.format( parent = parentDir, group = groupDir ) ) 219 | messages = getHistory(slack.groups, group['id']) 220 | parseMessages( parentDir, groupDir, messages, 'group' ) 221 | 222 | # fetch all users for the channel and return a map userId -> userName 223 | def getUserMap(slack): 224 | #get all users in the slack organization 225 | users = slack.users.list().body['members'] 226 | userIdNameMap = {} 227 | for user in users: 228 | userIdNameMap[user['id']] = user['name'] 229 | print("found {0} users ".format(len(users))) 230 | return userIdNameMap 231 | 232 | # stores json of user info 233 | def dumpUserFile(slack): 234 | #write to user file, any existing file needs to be overwritten. 235 | with open( "users.json", 'w') as userFile: 236 | json.dump( slack.users.list().body['members'], userFile, indent=4 ) 237 | 238 | # get basic info about the slack channel to ensure the authentication token works 239 | def doTestAuth(slack): 240 | testAuth = slack.auth.test().body 241 | teamName = testAuth['team'] 242 | currentUser = testAuth['user'] 243 | print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser)) 244 | return testAuth 245 | 246 | if __name__ == "__main__": 247 | parser = argparse.ArgumentParser(description='download slack history') 248 | 249 | parser.add_argument('--token', help="an api token for a slack user") 250 | 251 | parser.add_argument( 252 | '--dryRun', 253 | action='store_true', 254 | default=False, 255 | help="if dryRun is true, don't fetch/write history only get channel names") 256 | 257 | parser.add_argument( 258 | '--skipPrivateChannels', 259 | action='store_true', 260 | default=False, 261 | help="skip fetching history for private channels") 262 | 263 | parser.add_argument( 264 | '--skipChannels', 265 | action='store_true', 266 | default=False, 267 | help="skip fetching history for channels") 268 | 269 | parser.add_argument( 270 | '--skipDirectMessages', 271 | action='store_true', 272 | default=False, 273 | help="skip fetching history for directMessages") 274 | 275 | args = parser.parse_args() 276 | 277 | slack = Slacker(args.token) 278 | 279 | testAuth = doTestAuth(slack) 280 | 281 | userIdNameMap = getUserMap(slack) 282 | 283 | dryRun = args.dryRun 284 | 285 | if not dryRun: 286 | #write channel and user jsons 287 | dumpUserFile(slack) 288 | dumpChannelFile(slack) 289 | 290 | if not args.skipChannels: 291 | getChannels(slack, dryRun) 292 | 293 | if not args.skipPrivateChannels: 294 | getPrivateChannels(slack, dryRun) 295 | 296 | if not args.skipDirectMessages: 297 | getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun) 298 | --------------------------------------------------------------------------------