├── README.md └── bot.py /README.md: -------------------------------------------------------------------------------- 1 | # Scraper Bot 2 | 3 | This project is being discontinued. Aparently, the Discord API policies, as of July 1st, 2020, prohibit the utilization of the API for scraping any data. I hadn't realized this until my heroku account that was hosting the bot got suspended, which prompted me to dig into the policies of both platforms. The bot is no longer being hosted and has been removed from all servers. 4 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import discord 3 | import logging 4 | import pandas as pd 5 | 6 | logging.basicConfig(level=logging.INFO) 7 | 8 | intents = discord.Intents.default() 9 | client = discord.Client(intents=intents) 10 | guild = discord.Guild 11 | 12 | @client.event 13 | async def on_ready(): 14 | print('We have logged in as {0.user}'.format(client)) 15 | await client.change_presence(activity=discord.Game('_scan help')) 16 | 17 | @client.event 18 | async def on_message(message): 19 | if message.author == client.user: 20 | return 21 | elif message.content.startswith('_'): 22 | 23 | cmd = message.content.split()[0].replace("_","") 24 | if len(message.content.split()) > 1: 25 | parameters = message.content.split()[1:] 26 | 27 | # Bot Commands 28 | 29 | if cmd == 'scan': 30 | 31 | data = pd.DataFrame(columns=['content', 'time', 'author']) 32 | 33 | # Acquiring the channel via the bot command 34 | if len(message.channel_mentions) > 0: 35 | channel = message.channel_mentions[0] 36 | else: 37 | channel = message.channel 38 | 39 | # Aquiring the number of messages to be scraped via the bot command 40 | if (len(message.content.split()) > 1 and len(message.channel_mentions) == 0) or len(message.content.split()) > 2: 41 | for parameter in parameters: 42 | if parameter == "help": 43 | answer = discord.Embed(title="Command Format", 44 | description="""`_scan `\n\n`` : **the channel you wish to scan**\n`` : **the number of messages you wish to scan**\n\n*The order of the parameters does not matter.*""", 45 | colour=0x1a7794) 46 | await message.channel.send(embed=answer) 47 | return 48 | elif parameter[0] != "<": # Channels are enveloped by "<>" as strings 49 | limit = int(parameter) 50 | else: 51 | limit = 100 52 | 53 | answer = discord.Embed(title="Creating your Message History Dataframe", 54 | description="Please Wait. The data will be sent to you privately once it's finished.", 55 | colour=0x1a7794) 56 | 57 | await message.channel.send(embed=answer) 58 | 59 | def is_command (message): 60 | if len(msg.content) == 0: 61 | return False 62 | elif msg.content.split()[0] == '_scan': 63 | return True 64 | else: 65 | return False 66 | 67 | async for msg in channel.history(limit=limit + 1000): # The added 1000 is so in case it skips messages for being 68 | if msg.author != client.user: # a command or a message it sent, it will still read the 69 | if not is_command(msg): # the total amount originally specified by the user. 70 | data = data.append({'content': msg.content, 71 | 'time': msg.created_at, 72 | 'author': msg.author.name}, ignore_index=True) 73 | if len(data) == limit: 74 | break 75 | 76 | # Turning the pandas dataframe into a .csv file and sending it to the user 77 | 78 | file_location = f"{str(channel.guild.id) + '_' + str(channel.id)}.csv" # Determining file name and location 79 | data.to_csv(file_location) # Saving the file as a .csv via pandas 80 | 81 | answer = discord.Embed(title="Here is your .CSV File", 82 | description=f"""It might have taken a while, but here is what you asked for.\n\n`Server` : **{message.guild.name}**\n`Channel` : **{channel.name}**\n`Messages Read` : **{limit}**""", 83 | colour=0x1a7794) 84 | 85 | await message.author.send(embed=answer) 86 | await message.author.send(file=discord.File(file_location, filename='data.csv')) # Sending the file 87 | os.remove(file_location) # Deleting the file 88 | 89 | 90 | client.run('your-token-here') 91 | --------------------------------------------------------------------------------