├── .gitignore ├── LICENSE ├── README.md ├── pii.py ├── pii_id.py ├── requirements.txt └── sensors ├── address.json ├── birthday.json ├── cpf.json ├── cpf.py ├── email.json ├── fullname.json ├── imei.json ├── latlong.json ├── phonenumber.json ├── rg.json ├── rg.py └── ssn.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Caio Lüders 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PII Identifier 2 | 3 | ![Screenshot](https://i.imgur.com/jbS1RlI.png) 4 | 5 | **Work on progress, not usable** 6 | 7 | This is a Burp Suite extension that idenfies PII data. 8 | 9 | If you have any PII idea, or feature request please open an Issue. 10 | 11 | # TODO 12 | 13 | - [x] Regexes find PII 14 | - [x] Tab 15 | - [x] Configuration checks 16 | 17 | ### PII 18 | 19 | - [x] CPF 20 | - [x] Birthday 21 | - [x] Full Name 22 | - [X] Email 23 | - [x] RG 24 | - [X] Address 25 | - [X] Phone number 26 | - [ ] SSN 27 | - [ ] IMEI ? 28 | - [ ] Lat / Long 29 | 30 | -------------------------------------------------------------------------------- /pii.py: -------------------------------------------------------------------------------- 1 | import re 2 | from burp import IBurpExtender 3 | from burp import IScannerCheck 4 | from burp import IScanIssue 5 | from burp import ITab 6 | from array import array 7 | from java.io import PrintWriter 8 | from javax.swing import JPanel, JTextField, JButton, JLabel, BoxLayout, JPasswordField, JCheckBox, JRadioButton, ButtonGroup, JSlider 9 | from java.awt import GridLayout, BorderLayout 10 | 11 | 12 | EXT_NAME = "PII Notifier" 13 | 14 | class BurpExtender(IBurpExtender, IScannerCheck, ITab): 15 | def __init__(self): 16 | self.cpfcheck = True 17 | self.fullnamecheck = True 18 | self.rgcheck = True 19 | self.birthdaycheck = True 20 | self.requestcheck = True 21 | self.responsecheck = True 22 | 23 | def registerExtenderCallbacks( self, callbacks):# your extension code here 24 | callbacks.setExtensionName(EXT_NAME) 25 | self._callbacks = callbacks 26 | self._helpers = callbacks.getHelpers() 27 | 28 | 29 | self._stdout = PrintWriter(callbacks.getStdout(), True) 30 | 31 | callbacks.registerScannerCheck(self) 32 | callbacks.addSuiteTab(self) 33 | 34 | 35 | def _get_matches(self, response) : 36 | matches = [] 37 | start = 0 38 | reslen = len(response) 39 | #matchlen = len(match) 40 | 41 | self._stdout.println(self._helpers.bytesToString(response)[0:20]) 42 | return False 43 | 44 | def doPassiveScan(self, baseRequestResponse) : 45 | matches = self._get_matches( baseRequestResponse.getResponse() ) 46 | 47 | return None 48 | 49 | 50 | def getUiComponent(self) : 51 | 52 | cpfcheck = self._callbacks.loadExtensionSetting("cpfcheck") 53 | rgcheck = self._callbacks.loadExtensionSetting("rgcheck") 54 | fullnamecheck = self._callbacks.loadExtensionSetting("fullnamecheck") 55 | birthdaycheck = self._callbacks.loadExtensionSetting("birthdaycheck") 56 | requestcheck = self._callbacks.loadExtensionSetting("requestcheck") 57 | responsecheck = self._callbacks.loadExtensionSetting("responsecheck") 58 | 59 | if cpfcheck : 60 | self.cpfcheck = (True if cpfcheck == "True" else False ) 61 | if responsecheck : 62 | self.responsecheck = (True if responsecheck == "True" else False ) 63 | if requestcheck : 64 | self.requestcheck = (True if requestcheck == "True" else False ) 65 | if rgcheck : 66 | self.rgcheck = (True if rgcheck == "True" else False ) 67 | if fullnamecheck : 68 | self.fullnamecheck = (True if fullnamecheck == "True" else False ) 69 | if birthdaycheck : 70 | self.birthdaycheck = (True if birthdaycheck == "True" else False ) 71 | 72 | self.panel = JPanel() 73 | self.main = JPanel() 74 | 75 | self.main.setLayout(GridLayout(0,2)) 76 | 77 | self.pii_types = JPanel() 78 | self.main.add(self.pii_types) 79 | self.pii_types.add(JLabel('PII Types')) 80 | self.cpf_checkbox = JCheckBox("CPF",self.cpfcheck) 81 | self.fullname_checkbox = JCheckBox("Full Name", self.fullnamecheck) 82 | self.rg_checkbox = JCheckBox("RG", self.rgcheck) 83 | self.birthday_checkbox = JCheckBox("Birthday", self.birthdaycheck) 84 | self.pii_types.add(self.fullname_checkbox) 85 | self.pii_types.add(self.birthday_checkbox) 86 | self.pii_types.add(self.rg_checkbox) 87 | self.pii_types.add(self.cpf_checkbox) 88 | 89 | self.check_panel = JPanel() 90 | self.main.add(self.check_panel) 91 | self.check_panel.add(JLabel("Checks")) 92 | self.request_checkbox = JCheckBox("Request",self.requestcheck) 93 | self.response_checkbox = JCheckBox("Response",self.responsecheck) 94 | self.check_panel.add(self.request_checkbox) 95 | self.check_panel.add(self.response_checkbox) 96 | 97 | self.slider_panel = JPanel() 98 | self.main.add(self.slider_panel) 99 | self.slider_panel.add(JLabel("Threshold")) 100 | self.threshold_slider = JSlider(JSlider.HORIZONTAL,0,100,50) 101 | self.slider_panel.add(self.threshold_slider) 102 | 103 | self.buttons = JPanel() 104 | self.main.add(self.buttons,BorderLayout.CENTER) 105 | 106 | self.save_button = JButton("Save", actionPerformed = self.savePressed) 107 | self.buttons.add(self.save_button) 108 | 109 | self.panel.add(self.main) 110 | 111 | return self.panel 112 | 113 | def savePressed(self, event) : 114 | self._callbacks.saveExtensionSetting("cpfcheck",str(self.cpf_checkbox.isSelected())) 115 | self._callbacks.saveExtensionSetting("rgcheck",str(self.rg_checkbox.isSelected())) 116 | self._callbacks.saveExtensionSetting("fullnamecheck",str(self.fullname_checkbox.isSelected())) 117 | self._callbacks.saveExtensionSetting("birthdaycheck",str(self.birthday_checkbox.isSelected())) 118 | self._callbacks.saveExtensionSetting("requestcheck",str(self.request_checkbox.isSelected())) 119 | self._callbacks.saveExtensionSetting("responsecheck",str(self.response_checkbox.isSelected())) 120 | return 121 | 122 | 123 | 124 | def getTabCaption(self) : 125 | return EXT_NAME 126 | 127 | def consolidateDuplicateIssues(self, existingIssue, newIssue): 128 | if existingIssue.getIssueName() == newIssue.getIssueName(): 129 | return -1 130 | 131 | return 0 132 | 133 | def extensionUnloaded(self): 134 | self._stdout.println("Extension was unloaded") 135 | 136 | class CustomScanIssue (IScanIssue): 137 | def __init__(self, httpService, url, httpMessages, name, detail, severity): 138 | self._httpService = httpService 139 | self._url = url 140 | self._httpMessages = httpMessages 141 | self._name = name 142 | self._detail = detail 143 | self._severity = severity 144 | 145 | def getUrl(self): 146 | return self._url 147 | 148 | def getIssueName(self): 149 | return self._name 150 | 151 | def getIssueType(self): 152 | return 0 153 | 154 | def getSeverity(self): 155 | return self._severity 156 | 157 | def getConfidence(self): 158 | return "Certain" 159 | 160 | def getIssueBackground(self): 161 | pass 162 | 163 | def getRemediationBackground(self): 164 | pass 165 | 166 | def getIssueDetail(self): 167 | return self._detail 168 | 169 | def getRemediationDetail(self): 170 | pass 171 | 172 | def getHttpMessages(self): 173 | return self._httpMessages 174 | 175 | def getHttpService(self): 176 | return self._httpService 177 | 178 | -------------------------------------------------------------------------------- /pii_id.py: -------------------------------------------------------------------------------- 1 | import re, unidecode, os, importlib, json 2 | from operator import itemgetter 3 | 4 | 5 | def findall(p, s): 6 | '''Yields all the positions of 7 | the pattern p in the string s.''' 8 | i = s.find(p) 9 | while i != -1: 10 | yield i 11 | i = s.find(p, i+1) 12 | 13 | def calculate_distance(data,data_decoded,x,keywords,threshold) : 14 | '''Calculate distance between two words on a string, 15 | prioritizes nearest find, ignores unicode.''' 16 | results = [] 17 | 18 | i_x = data.find(x) 19 | relevant_data = data_decoded[:i_x] 20 | 21 | for k in keywords : 22 | try : 23 | i_k = min([ i for i in findall(k,relevant_data)], key=lambda x:abs(x-i_x)) 24 | except : 25 | continue 26 | 27 | i_k += len(k) 28 | percent_distance = 100-(abs(i_x-i_k)*100 / len(data)) 29 | if threshold > percent_distance : 30 | continue 31 | results.append( [ x, k , percent_distance ] ) 32 | 33 | return sorted(results, key=itemgetter(2)) 34 | 35 | 36 | def load_sensors() : 37 | sensors_folder = "./sensors" 38 | sensors = {} 39 | psensors = os.listdir(sensors_folder) 40 | 41 | for f in psensors : 42 | location = os.path.join(sensors_folder,f) 43 | 44 | if f[-5:] != ".json": continue 45 | 46 | s = json.loads(open(location,'r').read()) 47 | 48 | sensors[s["name"].lower()] = s 49 | 50 | if "function_file" in s.keys() : 51 | info = importlib.util.spec_from_file_location( s["name"], os.path.join(sensors_folder,s["function_file"]) ) 52 | p = importlib.util.module_from_spec(info) 53 | info.loader.exec_module(p) 54 | sensors[s["name"].lower()]["function"] = p.check 55 | 56 | return sensors 57 | 58 | def run_sensors(options,data,threshold=0.0) : 59 | sensors = load_sensors() 60 | pii = {} 61 | 62 | options = [x.lower() for x in options] 63 | 64 | if "all" in options : 65 | options = sensors.keys() 66 | 67 | data_u = unidecode.unidecode(data.lower()) 68 | for o in options : 69 | if o in sensors.keys() : 70 | print(sensors[o]["regex"]) 71 | sensor_regex = re.compile(sensors[o]["regex"]) 72 | data_regexed = re.findall(sensor_regex,data) 73 | probable = [] 74 | for d in data_regexed : 75 | if type(d) == tuple : 76 | d = ''.join(d) 77 | if "function" in sensors[o].keys() : 78 | f_probable = sensors[o]["function"](d) 79 | probable.extend( f_probable ) 80 | elif len(sensors[o]["keywords"]) == 0 : 81 | probable.append( d ) 82 | else : 83 | probable_distance = calculate_distance(data,data_u,d,sensors[o]["keywords"],threshold) 84 | if len(probable_distance) > 0 : 85 | probable.append( probable_distance ) 86 | if len(probable) > 0 : 87 | pii[o] = probable 88 | 89 | return pii 90 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Automatically generated by https://github.com/damnever/pigar. 2 | 3 | # PII-Identifier/pii.py: 9 4 | import-java == 0.6 5 | 6 | # PII-Identifier/pii_id.py: 1 7 | unidecode == 1.1.2 8 | -------------------------------------------------------------------------------- /sensors/address.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Address", 3 | "description" : "Detects Addresses", 4 | "regex" : "(?:[A-Z]\\w+\\s){2,}", 5 | "keywords" : ["address","endereco"] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/birthday.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "Birthday", 3 | "description" : "Detects birthday dates", 4 | "regex" : "(\\d{2}(\\.|-|\\/)\\d{1,}(\\.|-|\\/)(\\d{4}|\\d{2}))", 5 | "keywords" : ["birthday","aniversario","data de nascimento","nascimento"] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/cpf.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "CPF", 3 | "description" : "Detects CPF", 4 | "regex" : "\\D\\d{11}\\D|\\d{9}-\\d{2}|\\d{3}\\.\\d{3}\\.\\d{3}-\\d{2}", 5 | "keywords" :[], 6 | "function_file" : "cpf.py" 7 | } 8 | -------------------------------------------------------------------------------- /sensors/cpf.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def check(data) : 4 | 5 | if not isinstance(data, list) : 6 | data = [data] 7 | 8 | maybe_cpfs_parsed = [ re.sub("[^0-9]","",c) for c in data ] 9 | 10 | true_cpfs = [] 11 | 12 | for c in maybe_cpfs_parsed : 13 | c1 = list(map(int,list(c))) 14 | c4 = c1[:9] 15 | c2 = [ c4[cc]*(10-cc) for cc in range(len(c4)) ] 16 | d1 = 11-(sum(c2)%11) 17 | c4.append(d1) 18 | c3 = [ c4[cc]*(11-cc) for cc in range(len(c4)) ] 19 | d2 = 11-(sum(c3)%11) 20 | c4.append(d2) 21 | if c == ''.join(map(str,c4)) : true_cpfs.append(c) 22 | 23 | return true_cpfs 24 | -------------------------------------------------------------------------------- /sensors/email.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "Email", 3 | "description" : "Detecs emails", 4 | "regex" : "([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\\.[a-zA-Z0-9_-]+)", 5 | "keywords" : [] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/fullname.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Fullname", 3 | "description" : "Detects Full Name", 4 | "regex" : "([A-Z][à-úa-zA-ZÀ-Ú]+\\s)([A-Z][à-úa-zA-ZÀ-Ú]+\\s?){1,}", 5 | "keywords" : ["nome","razao social","fullname"] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/imei.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "IMEI", 3 | "description" : "Detects IMEIS", 4 | "regex" : "\\D\\d{14}\\D|\\d{9}-\\d{2}|\\d{3}\\.\\d{3}\\.\\d{3}-\\d{2}", 5 | "keywords" : [] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/latlong.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "Latitude/Longitude", 3 | "description" : "Detects latitude and longitude", 4 | "regex" : "[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?),\\s*[-+]?(180(\\.0+)?|((1[0-7]\\d)|([1-9]?\\d))(\\.\\d+)?)", 5 | "keywords" : ["long","lat","longitude","latitude"] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/phonenumber.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Phone number", 3 | "description" : "Detects phone number", 4 | "regex" : "(\\+\\d{1,2}\\s)?\\(\\d{2,3}\\)?[\\s.-](\\d{3,4}[\\s.-]\\d{4,}|\\d{8,10})", 5 | "keywords" : ["phone","phonenumber","telefone","cellphone","celular","contato","contact"] 6 | } 7 | -------------------------------------------------------------------------------- /sensors/rg.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "RG", 3 | "description" : "Detects RGs", 4 | "regex" : "\\D\\d{9}\\D|\\d{2}\\.\\d{3}\\.\\d{3}-\\d{1}", 5 | "keywords" : [], 6 | "function_file" : "rg.py" 7 | } 8 | -------------------------------------------------------------------------------- /sensors/rg.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def check(data) : 4 | 5 | if not isinstance(data, list) : 6 | data = [data] 7 | 8 | maybe_rgs_parsed = [ re.sub("[^0-9]","",r) for r in data ] 9 | 10 | true_rgs = [] 11 | 12 | for r in maybe_rgs_parsed : 13 | r1 = list(map(int,list(r))) 14 | r4 = r1[:8] 15 | r2 = [ r4[rr]*(2+rr) for rr in range(len(r4)) ] 16 | d1 = 11-(sum(r2)%11) 17 | r4.append(d1) 18 | if r == ''.join(map(str,r4)) : true_rgs.append(r) 19 | 20 | return true_rgs 21 | -------------------------------------------------------------------------------- /sensors/ssn.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "SSN", 3 | "description" : "Detects SSNs", 4 | "regex" : "(!666|000|9\\d{2})\\d{3}-(?!00)\\d{2}-(?!0{4})\\d{4}", 5 | "keywords" : ["ssn","social security number"] 6 | } 7 | --------------------------------------------------------------------------------