├── .flake8 ├── .gitignore ├── LICENSE ├── README.md ├── ldif2bloodhound ├── __init__.py ├── __main__.py └── parser.py └── setup.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | show-source = True 3 | builtins = unicode 4 | max-line-length = 120 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg 2 | *.egg-info 3 | __pycache__ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Adrian Vollmer, SySS Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ldif2bloodhound 2 | =============== 3 | 4 | Convert an LDIF file to JSON files ingestible by BloodHound. 5 | 6 | The LDIF file should be retrieved like this with `ldapsearch`: 7 | 8 | ```console 9 | $ for base in "" "CN=Schema,CN=Configuration," ; do \ 10 | LDAPTLS_REQCERT=never ldapsearch \ 11 | -H ldap:// \ 12 | -D @corp.local \ 13 | -w \ 14 | -b "${base}DC=corp,DC=local" \ 15 | -x \ 16 | -o ldif-wrap=no \ 17 | -E pr=1000/noprompt \ 18 | -E '!1.2.840.113556.1.4.801=::MAMCAQc=' \ 19 | -LLL \ 20 | -ZZ \ 21 | '(objectClass=*)' \ 22 | ; done >> output_$(date +%s).ldif 23 | ``` 24 | 25 | In case StartTLS does not work, remove the `-ZZ` flag and replace 26 | `ldap://` with `ldaps://`. Or leave it at `ldap://` if you like to live 27 | dangerously. 28 | 29 | The second `-E` argument is needed so that ACLs are also dumped. 30 | 31 | Then, the conversion works as follows: 32 | 33 | ```console 34 | $ ldif2bloodhound output_*.ldif 35 | ``` 36 | 37 | For more options, run `ldif2bloodhound --help`. 38 | 39 | The obvious limitation is that you won't get information about sessions or 40 | local group memberships, just like with 41 | [ADExplorerSnapshot.py](https://github.com/c3c/ADExplorerSnapshot.py). 42 | Parsing LDIF data is more equivalent to running SharpHound with `-c DCOnly` 43 | (perhaps even less). 44 | [BloodHound.py](https://github.com/fox-it/BloodHound.py) is a better choice 45 | to collect this data in most scenarios. 46 | 47 | Installation 48 | ------------ 49 | 50 | Install with this command: 51 | 52 | ```console 53 | $ pip install git+https://github.com/SySS-Research/ldif2bloodhound 54 | ``` 55 | 56 | Copyright and License 57 | --------------------- 58 | 59 | SySS GmbH, Adrian Vollmer. MIT Licensed. 60 | -------------------------------------------------------------------------------- /ldif2bloodhound/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.0" 2 | description = "Convert a LDIF file to JSON files ingestible by BloodHound" 3 | -------------------------------------------------------------------------------- /ldif2bloodhound/__main__.py: -------------------------------------------------------------------------------- 1 | def parse_args(): 2 | import argparse 3 | 4 | parser = argparse.ArgumentParser( 5 | description=( 6 | """Convert an LDIF file to JSON files ingestible by BloodHound 7 | 8 | Based on ADExplorerSnapshot.py. By Adrian Vollmer, SySS GmbH.""" 9 | ), 10 | formatter_class=argparse.RawTextHelpFormatter, 11 | ) 12 | 13 | parser.add_argument( 14 | "input", 15 | type=str, 16 | help="path to the input LDIF file", 17 | ) 18 | 19 | parser.add_argument( 20 | "-o", 21 | "--output-dir", 22 | default=".", 23 | help="path to the output directory (default: %(default)s)", 24 | ) 25 | 26 | args = parser.parse_args() 27 | 28 | return args 29 | 30 | 31 | def main(): 32 | import logging 33 | 34 | import pwnlib 35 | from adexpsnapshot import ADExplorerSnapshot 36 | from ldif2bloodhound.parser import LDIFSnapshot 37 | 38 | logging.basicConfig(handlers=[pwnlib.log.console]) 39 | log = pwnlib.log.getLogger(__name__) 40 | log.setLevel(20) 41 | 42 | if pwnlib.term.can_init(): 43 | pwnlib.term.init() 44 | log.term_mode = pwnlib.term.term_mode 45 | 46 | args = parse_args() 47 | 48 | ades = ADExplorerSnapshot( 49 | args.input, 50 | args.output_dir, 51 | log=log, 52 | snapshot_parser=LDIFSnapshot, 53 | ) 54 | 55 | ades.outputBloodHound() 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /ldif2bloodhound/parser.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import collections 3 | import logging 4 | import os 5 | from requests.structures import CaseInsensitiveDict 6 | import struct 7 | import sys 8 | 9 | from ldif import LDIFParser 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class SeekableLDIFParser(LDIFParser): 15 | """This subclass of LDIFParser can build an index for random access 16 | 17 | This makes large LDIF files easier to handle. In this case, the parser 18 | needs a reference to the snapshot object. It will be passed to objects 19 | which are parsed from blocks, so they can access some extra information. 20 | (In particular the `category` property.) 21 | """ 22 | 23 | def __init__(self, fp, snapshot, **kwargs): 24 | super().__init__(fp, **kwargs) 25 | self.snapshot = snapshot 26 | 27 | def build_index(self): 28 | """Build the index, which is a dict mapping the DN to the position in the file""" 29 | 30 | if self.byte_counter: 31 | raise RuntimeError("Index can only be built before first parsing") 32 | 33 | self._index = {} # Assumed to be OrderedDict 34 | 35 | pos = 0 36 | for block in self._iter_blocks(): 37 | first_line = block[0].partition(b"\n")[0] 38 | 39 | if first_line.startswith(b"dn: "): 40 | dn = first_line[4:].decode() 41 | elif first_line.startswith(b"dn:: "): 42 | dn = first_line[5:] 43 | dn = base64.b64decode(dn).decode() 44 | else: 45 | raise RuntimeError("Parsing error at position %d" % pos) 46 | 47 | self._index[dn] = pos 48 | pos = self.byte_counter 49 | 50 | self._input_file.seek(0) 51 | 52 | def __getitem__(self, dn): 53 | try: 54 | self._input_file.seek(self._index[dn]) 55 | except AttributeError: 56 | raise RuntimeError("Index has not been built yet") 57 | 58 | block = next(self._iter_blocks()) 59 | result = Object(self._parse_entry_record(block)[1], self.snapshot) 60 | 61 | return result 62 | 63 | def get_by_index(self, i): 64 | key = list(self._index.keys())[i] 65 | return self[key] 66 | 67 | 68 | class Object(object): 69 | """Represents an LDAP object 70 | 71 | Must be sufficiently compatible with ADExplorerSnapshot objects""" 72 | 73 | def __init__(self, data, snapshot): 74 | self._data = CaseInsensitiveDict(data) 75 | self.snapshot = snapshot 76 | self.fix_attribute_types() 77 | 78 | def fix_attribute_types(self): 79 | """Everything is a string in LDIF, so convert as needed""" 80 | 81 | types = { 82 | "userAccountControl": int, 83 | "sAMAccountType": int, 84 | "systemFlags": int, 85 | "adminCount": int, 86 | "whenCreated": convert_timestamp, 87 | "objectSid": convert_sid, 88 | "objectGUID": convert_GUID, 89 | } 90 | 91 | for attr, _type in types.items(): 92 | if attr in self._data: 93 | self._data[attr] = list(map(_type, self._data[attr])) 94 | 95 | def _category(self): 96 | # copied mostly from ADExplorerSnapshot 97 | 98 | catDN = self.objectCategory 99 | if not catDN: 100 | return None 101 | 102 | catDN = catDN[0] 103 | catObj = self.snapshot.classes.get(catDN) 104 | if catObj: 105 | return catObj.cn[0].lower() 106 | else: 107 | return None 108 | 109 | def __getattr__(self, attr): 110 | # Quite hacky solution 111 | 112 | if attr.startswith("__") and attr.endswith("__"): 113 | raise AttributeError 114 | 115 | # This is a special attribute 116 | if attr == "category": 117 | return self._category() 118 | 119 | # ADExplorer sometimes uses different attribute names 120 | attr_map = { 121 | "classes": "objectClass", 122 | "schemaIDGUID": "objectGUID", 123 | } 124 | 125 | attr = attr_map.get(attr, attr) 126 | 127 | result = self._data.get(attr, []) 128 | return result 129 | 130 | def __getitem__(self, key): 131 | # This object wants to be accessed like an ldap3 object: 132 | # object['attributes'][key] 133 | 134 | if key == "attributes": 135 | return self._data 136 | elif key == "raw_attributes": 137 | # Seems to work like this 138 | return self._data 139 | else: 140 | raise AttributeError 141 | 142 | 143 | class LDIFSnapshot(object): 144 | """A class compatible with ADExplorerSnapshot's `Snapshot` class""" 145 | 146 | def __init__(self, path, log=None): 147 | fp = open(path, "rb") 148 | self._P = SeekableLDIFParser(fp, snapshot=self) 149 | self.path = path 150 | 151 | def parseHeader(self): 152 | self._P.build_index() 153 | Header = collections.namedtuple( 154 | "Header", 155 | "filetimeUnix server mappingOffset numObjects filetime".split(), 156 | ) 157 | 158 | # We don't know these things, they are not included in the LDIF 159 | # file, but the dependecy expects something here. 160 | filetime = os.path.getmtime(self.path) 161 | path = str(os.path.abspath(self.path)).replace(os.sep, "_") 162 | self.header = Header( 163 | filetimeUnix=filetime, 164 | server="ldifdump" + path, 165 | mappingOffset=0, 166 | numObjects=len(self._P._index), 167 | filetime=str(filetime), 168 | ) 169 | 170 | def parseProperties(self): 171 | # This is done in parseClasses in one loop 172 | pass 173 | 174 | def parseClasses(self): 175 | self.classes = CaseInsensitiveDict() 176 | self.propertyDict = CaseInsensitiveDict() 177 | self.properties = [] 178 | 179 | for obj in self.objects: 180 | # Mimic the behavior of ADExplorerSnapshot 181 | if "classSchema" in obj.classes: 182 | cn = obj.cn[0] 183 | dn = obj.distinguishedName[0] 184 | 185 | self.classes[cn] = obj 186 | self.classes[dn] = obj 187 | self.classes[dn.split(",")[0].split("=")[1]] = obj 188 | 189 | if "attributeSchema" in obj.classes: 190 | cn = obj.cn[0] 191 | dn = obj.distinguishedName[0] 192 | 193 | idx = len(self.properties) 194 | self.properties.append(obj) 195 | # abuse our dict for both DNs and the display name / cn 196 | self.propertyDict[cn] = idx 197 | self.propertyDict[dn] = idx 198 | self.propertyDict[dn.split(",")[0].split("=")[1]] = idx 199 | 200 | def parseObjectOffsets(self): 201 | # Not needed, we already have the offsets from `build_index` 202 | pass 203 | 204 | def getObject(self, i): 205 | obj = self._P.get_by_index(i) 206 | return obj 207 | 208 | @property 209 | def objects(self): 210 | for i in range(self.header.numObjects): 211 | obj = self.getObject(i) 212 | if obj: 213 | yield obj 214 | 215 | 216 | def convert_GUID(guid): 217 | order = [4, 3, 2, 1, 6, 5, 8, 7, 9, 10, 11, 12, 13, 14, 15, 16] 218 | result = "" 219 | 220 | for i in order: 221 | result += "%x" % guid[i - 1] 222 | 223 | return result 224 | 225 | 226 | def convert_timestamp(date): 227 | """Convert string to integer timestamp 228 | 229 | Example of input date: "20070828085401.0Z" 230 | """ 231 | import datetime 232 | 233 | time_string = date.split(".")[0] 234 | time_object = datetime.datetime.strptime(time_string, "%Y%m%d%H%M%S") 235 | time_object = int(time_object.timestamp()) 236 | 237 | return time_object 238 | 239 | 240 | def convert_sid(sid): 241 | """Converts a hexadecimal string returned from the LDAP query to a 242 | string version of the SID in format of S-1-5-21-1270288957-3800934213-3019856503-500 243 | This function was based from: http://www.gossamer-threads.com/lists/apache/bugs/386930 244 | 245 | Found here: 246 | https://gist.github.com/mprahl/e38a2eba6da09b2f6bd69d30fd3b749e 247 | This works better than the function from bloodhound.ad.utils. The former 248 | crashes on short SIDs such as S-1-5-32-553. 249 | """ 250 | if isinstance(sid, str): 251 | sid = sid.encode() 252 | # The revision level (typically 1) 253 | if sys.version_info.major < 3: 254 | revision = ord(sid[0]) 255 | else: 256 | revision = sid[0] 257 | # The number of dashes minus 2 258 | if sys.version_info.major < 3: 259 | number_of_sub_ids = ord(sid[1]) 260 | else: 261 | number_of_sub_ids = sid[1] 262 | # Identifier Authority Value (typically a value of 5 representing "NT Authority") 263 | # ">Q" is the format string. ">" specifies that the bytes are big-endian. 264 | # The "Q" specifies "unsigned long long" because 8 bytes are being decoded. 265 | # Since the actual SID section being decoded is only 6 bytes, we must precede it with 2 empty bytes. 266 | iav = struct.unpack(">Q", b"\x00\x00" + sid[2:8])[0] 267 | # The sub-ids include the Domain SID and the RID representing the object 268 | # '