├── LICENSE ├── README.md ├── chophound.ps1 ├── chophound.py └── replace.py /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Arris Huijgen 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ChopHound 2 | Some scripts for dealing with any challenges that might arise when importing (large) JSON datasets into BloodHound. The blog which discusses these scripts can be found at [https://blog.bitsadmin.com/blog/dealing-with-large-bloodhound-datasets](https://blog.bitsadmin.com/blog/dealing-with-large-bloodhound-datasets). 3 | 4 | ## Scripts 5 | | Name | Description | 6 | | ---- | ----------- | 7 | | chophound.ps1 | PowerShell implementation of ingesting a large BloodHound JSON file and splitting it into smaller chunks. Note that if the file you are trying to split is too large (or your PC's memory is too small), this script will fail with an out of memory exception. | 8 | | chophound.py | Python implementation of the .ps1 script which has support for splitting large JSON files into smaller chunks. | 9 | | replace.py | Little script to replace non-ASCII characters in the file provided with a question mark ('?') in order to avoid possible encoding errors. Note that when running this script against a file with a Byte-Order Mark at the beginning, those bytes will also simply be replaced by question marks and you will need to manually remove those bytes with a hex editor like [HxD](https://mh-nexus.de/en/hxd/) | 10 | -------------------------------------------------------------------------------- /chophound.ps1: -------------------------------------------------------------------------------- 1 | param ([Parameter(Mandatory)]$File, [int]$ChunkSize=5000) 2 | 3 | # Banner 4 | "ChopHoundPS v1.0 ( https://github.com/bitsadmin/chophound/ )" 5 | 6 | # Read file into memory 7 | Write-Warning "Reading file $File" 8 | $name_no_ext = [System.IO.Path]::GetFileNameWithoutExtension($File) 9 | $js = Get-Content -Raw $File | ConvertFrom-Json 10 | 11 | if(-not $?) 12 | { 13 | Write-Warning "Error while reading file. Quitting." 14 | return 15 | } 16 | 17 | # Determine data tag name 18 | $tagname = $js.meta.type 19 | if($js.meta.version -gt 3) 20 | { $tagname = 'data' } 21 | 22 | # Calculate number of blocks 23 | $numblocks = [Math]::Ceiling($js.data.Count / $ChunkSize) 24 | Write-Warning "Splitting in $numblocks blocks of $ChunkSize elements" 25 | $i = 0 26 | 27 | # Perform splitting 28 | while($i*$ChunkSize -lt $js.data.Count) 29 | { 30 | $outname = "{0}_{1:0000}.json" -f $name_no_ext,$i 31 | 32 | # meta -> count 33 | $meta = $js.meta 34 | $meta.count = $ChunkSize 35 | if(($i+1)*$ChunkSize -gt $js.data.Count) 36 | { $meta.count = $js.data.Count - ($i*$ChunkSize)} 37 | 38 | Write-Warning "Writing file $outname" 39 | 40 | # Meta tag MUST be after the data, otherwise BloodHound won't find it 41 | '{{"{0}":{1},"meta":{2}}}' -f ` 42 | $tagname, ` 43 | ($js.data[($i*$ChunkSize)..((($i+1)*$ChunkSize)-1)] | ConvertTo-Json -Depth 100 -Compress), ` 44 | ($meta | ConvertTo-Json -Compress) ` 45 | | Out-File $outname -NoNewline -Encoding UTF8BOM 46 | 47 | $i++ 48 | } -------------------------------------------------------------------------------- /chophound.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import argparse 4 | import json 5 | try: 6 | import ijson.backends.python as ijson 7 | except (ImportError, ModuleNotFoundError): 8 | print('Error: install the Python ijson module first') 9 | 10 | VERSION = 1.0 11 | 12 | def main(args): 13 | # Collect args 14 | file = args.file 15 | chunksize = args.chunksize 16 | 17 | # Fixed variables 18 | basename = os.path.splitext(os.path.basename(file))[0] 19 | jsonformat = '{"data":[%s],"meta":%s}' 20 | 21 | # Open in binary mode to seek 22 | print('[+] Opening file %s' % file) 23 | with open(file, 'rb') as js: 24 | # Obtain meta tag 25 | js.seek(-0x100, os.SEEK_END) 26 | lastbytes = str(js.read(0x100)) 27 | if args.verbose: 28 | print(f"lastbytes: {lastbytes}") 29 | metatagstr = re.search('("meta":(\s+)?{.*})', lastbytes, re.IGNORECASE).group(1).replace('\\n',"") 30 | if args.verbose: 31 | print(metatagstr) 32 | metatag = json.loads('{' + metatagstr) 33 | 34 | # Open in text mode to parse 35 | with open(file, 'r', encoding='utf-8-sig', errors='replace') as js: 36 | items = ijson.items(js, 'data.item') 37 | 38 | endoflist = False 39 | i = 0 40 | while True: 41 | outfile = '%s_%.4d.json' % (basename, i) 42 | 43 | # Get chunk 44 | chunks = [] 45 | count = 0 46 | try: 47 | while True: 48 | item = next(items) 49 | chunks.append(json.dumps(item)) 50 | 51 | count += 1 52 | if count == chunksize: 53 | break 54 | except StopIteration: 55 | endoflist = True 56 | 57 | # Update meta tag 58 | metatag['meta']['count'] = count 59 | 60 | # Format and store 61 | print('[+] Writing %s' % outfile) 62 | with open(outfile, 'w', encoding='utf-8-sig', errors='replace') as jsout: 63 | jsout.write(jsonformat % (','.join(chunks), json.dumps(metatag['meta']))) 64 | 65 | i += 1 66 | 67 | if endoflist: 68 | break 69 | 70 | def getargs(): 71 | parser = argparse.ArgumentParser( 72 | description='Convert large BloodHound json to smaller chunks' 73 | ) 74 | parser.add_argument('file', help='JSON file to split') 75 | parser.add_argument('-c', '--chunksize', default=10000, type=int, dest='chunksize', help='Number of items per outputted chunk') 76 | parser.add_argument('-v', '--verbose', action=argparse.BooleanOptionalAction, help='Show verbose output') 77 | 78 | return parser.parse_args() 79 | 80 | if __name__ == '__main__': 81 | print('ChopHound v%.2f ( https://github.com/bitsadmin/chophound/ )' % VERSION) 82 | main(getargs()) 83 | -------------------------------------------------------------------------------- /replace.py: -------------------------------------------------------------------------------- 1 | import mmap 2 | import argparse 3 | 4 | VERSION = 1.0 5 | 6 | def main(args): 7 | file = args.file 8 | verbose = args.verbose 9 | crapbytes = [] 10 | 11 | print('Locating non-ASCII characters in %s' % file) 12 | with open(file, 'r+b') as f: 13 | mem = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 14 | offset = 0 15 | for byte in mem: 16 | obyte = ord(byte) 17 | if (obyte < 0x20 or obyte > 0x7e) and obyte not in (0x0a, 0x0d): 18 | crapbytes.append(offset) 19 | if verbose: 20 | print("Found non-ASCII character at offset 0x%.8x" % offset) 21 | 22 | offset += 1 23 | 24 | mem.close() 25 | print('Found a total of %d non-ASCII characters' % len(crapbytes)) 26 | 27 | print('Fixing non-ASCII characters in %s' % file) 28 | with open(file, 'r+b') as f: 29 | mem = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_WRITE) 30 | 31 | # Navigate to offset and write question mark 32 | for offset in crapbytes: 33 | if verbose: 34 | print("Writing '?' to offset 0x%.8x" % offset) 35 | mem.seek(offset) 36 | mem.write_byte(0x3f) 37 | 38 | mem.close() 39 | print('Fixed a total of %d non-ASCII characters' % len(crapbytes)) 40 | 41 | def getargs(): 42 | parser = argparse.ArgumentParser( 43 | description='In-place replacement of non-ASCII characters to question marks' 44 | ) 45 | parser.add_argument('file', help='File to replace non-ASCII characters in') 46 | parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Show verbose output') 47 | 48 | return parser.parse_args() 49 | 50 | if __name__ == '__main__': 51 | print('ReplaceNonASCII v%.2f ( https://github.com/bitsadmin/chophound/ )' % VERSION) 52 | main(getargs()) 53 | --------------------------------------------------------------------------------