├── .gitignore ├── README.md └── pastefind.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | 3 | # Packages 4 | *.egg 5 | *.egg-info 6 | dist 7 | build 8 | eggs 9 | parts 10 | bin 11 | var 12 | sdist 13 | develop-eggs 14 | .installed.cfg 15 | 16 | # Installer logs 17 | pip-log.txt 18 | 19 | # Unit test / coverage reports 20 | .coverage 21 | .tox 22 | 23 | #Translations 24 | *.mo 25 | 26 | #Mr Developer 27 | .mr.developer.cfg 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pastebin-find 2 | ============= 3 | 4 | Python script to monitor new Pastebin pastes for a provided search term 5 | 6 | Usage: python pastefind.py search_regex 7 | 8 | Edit the "time_between" variable to change the time between requests. Currently it is at 5s. If the value is too low, pastebin may block your IP address for repeated queries. If it is too high, you may miss a paste since the program only scans the top 10 pastes on the recently added page. 9 | 10 | Regex should be the regex itself in quotes. For example, to search for formatted phone numbers, use '\(?(\d{3})\)?-?(\d{3})-(\d{4})' as the search input. 11 | -------------------------------------------------------------------------------- /pastefind.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Python file to monitor pastebin for pastes containing the passed regex 3 | 4 | import sys 5 | import time 6 | import urllib 7 | import re 8 | 9 | # User-defined variables 10 | time_between = 7 #Seconds between iterations (not including time used to fetch pages - setting below 5s may cause a pastebin IP block, too high may miss pastes) 11 | error_on_cl_args = "Please provide a single regex search via the command line" #Error to display if improper command line arguments are provided 12 | 13 | # Check for command line argument (a single regex) 14 | if len(sys.argv) != 1: 15 | search_term = sys.argv[1] 16 | else: 17 | print error_on_cl_args 18 | exit() 19 | 20 | iterater = 1 21 | 22 | while(1): 23 | counter = 0 24 | 25 | print "Scanning pastebin - iteration " + str(iterater) + "..." 26 | 27 | #Open the recently posted pastes page 28 | try: 29 | url = urllib.urlopen("http://pastebin.com/archive") 30 | html = url.read() 31 | url.close() 32 | html_lines = html.split('\n') 33 | for line in html_lines: 34 | if counter < 10: 35 | if re.search(r'\"\".*', line): 36 | link_id = line[72:80] 37 | #print link_id 38 | 39 | #Begin loading of raw paste text 40 | url_2 = urllib.urlopen("http://pastebin.com/raw.php?i=" + link_id) 41 | raw_text = url_2.read() 42 | url_2.close() 43 | 44 | #if search_term in raw_text: 45 | if re.search(r''+search_term, raw_text): 46 | print "FOUND " + search_term + " in http://pastebin.com/raw.php?i=" + link_id 47 | 48 | counter += 1 49 | except(IOError): 50 | print "Network error - are you connected?" 51 | except: 52 | print "Fatal error! Exiting." 53 | exit() 54 | iterater += 1 55 | time.sleep(time_between) --------------------------------------------------------------------------------