└── visualization ├── README.md ├── graph.gpi ├── hex2mult.py ├── log2plot.py ├── mult2hex.py ├── mult2kad.py ├── multihash ├── __init__.py ├── codecs.py ├── funcs.py ├── multihash.py └── version.py ├── plot └── shrink.py /visualization/README.md: -------------------------------------------------------------------------------- 1 | # Visualization of IPFS DHT queries 2 | These scripts enable the visualization of the connections timeline during an IPFS DHT query. 3 | 4 | ## Use instructions 5 | 6 | ### Step 1. Start the IPFS daemon on your machine 7 | 8 | Assuming you have already installed IPFS, you start the IPFS daemon by executing: 9 | 10 | ```bash 11 | ipfs daemon 12 | ``` 13 | 14 | ### Step 2. Run an IPFS DHT query 15 | 16 | To run an IPFS DHT query, you normally execute: 17 | 18 | ```bash 19 | ipfs dht query 20 | ``` 21 | 22 | In order to visualize the connections opened and used during a query, you should also provide the verbose flag (-v) and redirect the standard output into a file, say query.log. Here's an example, including a sample CID: 23 | 24 | ```bash 25 | ipfs dht query -v QmefYbmED9E1cw3NEqtxKDmVrzk3Z351ZDgwzwKdQ4Ajbj > query.log 26 | ``` 27 | 28 | 29 | 30 | ### Step 3. Parse the query log 31 | 32 | To parse the log produced in the previous step, you should execute: 33 | 34 | ```bash 35 | cat query.log | ./log2plot.py > DATA 36 | ``` 37 | 38 | This will parse the query log, and will output visualization data to be consumed by gnuplot. We redirect that output into a file called DATA, which the gnuplot script expects to find. 39 | 40 | ### Step 4. Visualize the data 41 | 42 | At this final step, you execute [gnuplot](http://www.gnuplot.info/) to produce the visualization. You execute: 43 | 44 | ```bash 45 | gnuplot graph.gpi 46 | ``` 47 | 48 | This will pop up a window with the timeline of connections that took place during the query. 49 | 50 | You may optionally click on the top left button in the visualization window to export the plot as a PDF or image. 51 | 52 | 53 | -------------------------------------------------------------------------------- /visualization/graph.gpi: -------------------------------------------------------------------------------- 1 | ################################################### 2 | # # 3 | # Written by Spyros Voulgaris (voulgaris@aueb.gr) # 4 | # April 2021 # 5 | # # 6 | # Mobile Multimedia Lab # 7 | # Athens University of Economics and Business # 8 | # Athens, Greece # 9 | # # 10 | ################################################### 11 | 12 | #set term png size 1920,1200 13 | #set term png size 1600,900 14 | #set terminal pdfcairo enhanced size 32cm,18cm font ",8" 15 | #set terminal pdfcairo enhanced font ",8" 16 | #set term postscript eps enhanced color 13 17 | #set size 1.6,1.2 18 | #set size 2,2 19 | set term qt 20 | 21 | min(a,b) = ab ? a : b 23 | max3(a,b,c) = max(max(a,b),max(a,c)) 24 | 25 | set key opaque 26 | 27 | set xlabel "time (sec)" 28 | 29 | set xrange [0:*] 30 | set yrange [] reverse 31 | set xtics 1 32 | set ytics 1 33 | set grid 34 | 35 | plot "DATA" index 0 using 2:1:($3-$2):(0) with vectors nohead lw 2 lc 4 title "dial", \ 36 | "DATA" index 0 using 4:1 with points pt 2 ps 1.5 lw 2 lc 7 title "dial error", \ 37 | "DATA" index 0 using (min($5,$6)):1:($6-$5):(0) with vectors nohead lw 6 lc 3 title "query", \ 38 | "DATA" index 0 using (min($5,$7)):1:($7-$5):(0):5 with vectors nohead lw 6 lc rgb '#A056B4E9' title "unfinished query", \ 39 | "DATA" index 0 using 2:1 with points pt 7 ps 0.6 lc 4 notitle, \ 40 | "DATA" index 1 using 1:2:($3-$1):($4-$2) with vectors filled lc '#20C7162B' lw 1.5 dt 2 title "causality", \ 41 | "DATA" index 1 using 1:2 with points lc '#20C7162B' pt 7 ps 0.6 notitle, \ 42 | "DATA" index 0 using (max3($3,$6,$7)):1:(sprintf(" %s",stringcolumn(8))):($9==0?8:2) with labels textcolor variable left notitle, \ 43 | "DATA" index 2 using 1:(0):(0):2 with vectors nohead lw 3 dt 4 lc 8 title "context canceled", \ 44 | "DATA" index 3 using 1:(0):(0):2 with vectors nohead lw 3 dt 4 lc 2 title "results finalized" 45 | 46 | pause -1 47 | -------------------------------------------------------------------------------- /visualization/hex2mult.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import multihash 6 | import hashlib 7 | from typing import Union 8 | import base58 9 | 10 | 11 | def printb(bytes_array): 12 | print(''.join(format(x, '02x') for x in bytes_array)) 13 | 14 | 15 | def hex_to_multihash(hexString: str) -> str: 16 | """Converts hex string to base-58 multihash""" 17 | b = bytearray.fromhex(hexString) 18 | b = b.rjust(32, b'\0') 19 | 20 | prefix = int.to_bytes(18,1,'big') + int.to_bytes(32,1,'big') 21 | 22 | b = prefix+b 23 | 24 | multStr = base58.b58encode(bytes(b)) 25 | return multStr.decode('utf-8') 26 | 27 | 28 | for line in sys.stdin.readlines(): 29 | line = line.strip() 30 | 31 | h = hex_to_multihash(line) 32 | print(h) 33 | 34 | -------------------------------------------------------------------------------- /visualization/log2plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ################################################### 4 | # # 5 | # Written by Spyros Voulgaris (voulgaris@aueb.gr) # 6 | # April 2021 # 7 | # # 8 | # Mobile Multimedia Lab # 9 | # Athens University of Economics and Business # 10 | # Athens, Greece # 11 | # # 12 | ################################################### 13 | 14 | import sys 15 | import re 16 | from datetime import datetime 17 | from datetime import timedelta 18 | import collections 19 | import base58 20 | import hashlib 21 | 22 | 23 | 24 | 25 | #### 26 | # Global variables 27 | #### 28 | 29 | ID_LEN = 6 30 | 31 | requests = collections.defaultdict(dict) 32 | order = [] 33 | causality = [] 34 | 35 | startTime=None 36 | endTime=None 37 | lastResponse=None 38 | 39 | context_canceled = collections.defaultdict(int) 40 | 41 | target_determined = collections.defaultdict(int) 42 | 43 | unmatched_lines = [] 44 | 45 | 46 | 47 | #### 48 | # Function that converts a multihash to its hex representation 49 | #### 50 | 51 | def get_hex(b58_encoded_peer_id_str: str) -> str: 52 | """Converts base-58 multihash to hex representation""" 53 | bytes = base58.b58decode(b58_encoded_peer_id_str) 54 | sha256 = hashlib.sha256(bytes).digest() 55 | return sha256.hex() 56 | 57 | 58 | 59 | #### 60 | # The handle_ functions are called by the main loop when 61 | # a line matching the respective event's regex is found. 62 | #### 63 | 64 | def handle_querying(time, match): 65 | peer = get_hex(match.group(2)) 66 | 67 | if peer not in order: 68 | order.append(peer) 69 | 70 | requests[peer]['query_start'] = time 71 | 72 | # set query_end to this time too, in case it never ends 73 | #requests[peer]['query_end'] = time 74 | 75 | # if a dialing had started, record that it just ended 76 | if 'dial_start' in requests[peer]: 77 | requests[peer]['dial_end'] = time 78 | 79 | def handle_dialing(time, match): 80 | global lastResponse 81 | peer = get_hex(match.group(2)) 82 | 83 | if peer not in order: 84 | order.append(peer) 85 | 86 | requests[peer]['dial_start'] = time 87 | 88 | # set dial_end to this time too, in case it never ends 89 | requests[peer]['dial_end'] = time 90 | 91 | if lastResponse!=None: 92 | causality.append( (lastResponse[0], lastResponse[1], peer, time) ) 93 | 94 | def handle_says_use(time, match): 95 | global lastResponse 96 | peer = get_hex(match.group(2)) 97 | requests[peer]['query_end'] = time 98 | lastResponse = (peer, time) 99 | 100 | def handle_dial_error(time, match): 101 | global lastResponse 102 | peer = get_hex(match.group(2)) 103 | requests[peer]['dial_error'] = time 104 | requests[peer]['dial_end'] = time 105 | lastResponse = (peer, time) 106 | 107 | def handle_context_canceled(time, match): 108 | context_canceled[time] += 1 109 | 110 | def handle_target_found(time, match): 111 | global endTime 112 | peer = get_hex(match.group(2)) 113 | target_determined[time] += 1 114 | endTime = time 115 | requests[peer]['in_targets'] = True 116 | 117 | 118 | 119 | #### 120 | # The following function returns the 'relative time', i.e., 121 | # the time in seconds since the beginning of this query, 122 | # for a the time when 'event_type' occured for this 'peer'. 123 | #### 124 | 125 | def relative_time(peer, event_type): 126 | one_sec = timedelta(seconds=1) 127 | minus_one = startTime - one_sec 128 | 129 | time = requests[peer].get(event_type, minus_one) 130 | rel_time = (time - startTime).total_seconds() 131 | 132 | return rel_time 133 | 134 | 135 | 136 | #### 137 | # REGULAR EXPRESSIONS 138 | # 139 | # The following statements define the regular expressions 140 | # to use for parsing logs. 141 | # 142 | # The regex dict associated to each event type a tuple , 143 | # containing the regex to detect that event and the handler function. 144 | #### 145 | 146 | # Time format 147 | re_time = '(\d\d:\d\d:\d\d.\d\d\d)' 148 | 149 | # Multihashes 150 | re_Qm = 'Qm\w{44}' # Qm multihash 151 | re_12D3KooW = '12D3KooW\w{44}' # 12D3KooW multihash 152 | re_multihash = '('+re_Qm+'|'+re_12D3KooW+')' 153 | 154 | # Lines 155 | regex = {} 156 | regex['querying'] = (re_time + ': \* querying ' + re_multihash, handle_querying) 157 | regex['dialing'] = (re_time + ': dialing peer: ' + re_multihash, handle_dialing) 158 | regex['says_use'] = (re_time + ': \* ' + re_multihash + ' says use(?: '+re_multihash+')*', handle_says_use) 159 | regex['dial_error'] = (re_time + ': error: failed to dial ' + re_multihash + ': all dials failed', handle_dial_error) 160 | regex['context_canceled'] = (re_time + ': error: context canceled$', handle_context_canceled) 161 | regex['target_found'] = (re_time + ': ' + re_multihash, handle_target_found) 162 | 163 | 164 | 165 | 166 | 167 | #### 168 | # The main loop of the parser, iterating through stdin lines, one at a time. 169 | # For each line, it first tried to match a time regex in the beginning. 170 | # Then, it loops through all patterns defined in the regex dict, and if 171 | # one is found, it calls the respective handler function. 172 | #### 173 | 174 | lineNum = 0 175 | for line in sys.stdin: 176 | line = line.strip() 177 | #print(line) 178 | 179 | lineNum += 1 180 | 181 | # First, parse time, which is common for most lines. 182 | # If a line does not report a time keep the previous time, 183 | # as it probably refers to the same event. 184 | match = re.match(re_time, line) 185 | if match: 186 | time = datetime.strptime(match.group(1), '%H:%M:%S.%f') 187 | if startTime==None: 188 | startTime = time 189 | 190 | 191 | # Check all regular expressions for a match 192 | matched = False 193 | for r in regex: 194 | m = re.match(regex[r][0], line) 195 | if m: 196 | regex[r][1](time, m) 197 | matched = True 198 | break 199 | 200 | if not matched: 201 | unmatched_lines.append( (lineNum, time, line) ) 202 | 203 | 204 | 205 | 206 | 207 | #### 208 | # Parsing is complete. Let's output the results! 209 | # 210 | # First output for each peer contacted the times for starting/ending 211 | # the respective dialing and querying,as well as whether there was 212 | # an error with dialing, and whether that peer was eventually 213 | # among the set of K peers selected as targets. 214 | #### 215 | 216 | peerIndex = {} 217 | numRequests = len(order) 218 | 219 | print('#hop\tdial_st\tdial_end\tdial_err\tquery_st\tquery_end\tpeer_hash\ttarget') 220 | 221 | for i,peer in enumerate(order): 222 | 223 | dial_start = relative_time(peer, 'dial_start') 224 | dial_end = relative_time(peer, 'dial_end') 225 | dial_error = relative_time(peer, 'dial_error') 226 | 227 | query_start = relative_time(peer, 'query_start') 228 | query_end = relative_time(peer, 'query_end') 229 | 230 | in_targets = int('in_targets' in requests[peer]) 231 | 232 | if query_start > query_end and endTime != None : # i.e., this query started but never ended 233 | query_unfinished = (endTime - startTime).total_seconds() 234 | else: 235 | query_unfinished = -1 236 | 237 | print('%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%s\t%d' % (i+1, dial_start, dial_end, dial_error, query_start, query_end, query_unfinished, peer[0:ID_LEN], in_targets)) 238 | 239 | peerIndex[peer] = i+1 240 | 241 | 242 | 243 | 244 | #### 245 | # Output the causality relations between events, 246 | # namely providing coordinates for the respective red arrows in the plot. 247 | # 248 | # The causality relations are determined in a best effort way, 249 | # which is *not* error-proof. 250 | #### 251 | 252 | print('\n\n#Causality') 253 | print('#timeA\tpeerA\ttimeB\tpeerB') 254 | 255 | for (peer1, time1, peer2, time2) in causality: 256 | time1 = (time1 - startTime).total_seconds() 257 | time2 = (time2 - startTime).total_seconds() 258 | 259 | if peer1 in peerIndex: 260 | index1 = peerIndex[peer1] 261 | else: 262 | index1 = 0 263 | 264 | if peer2 in peerIndex: 265 | index2 = peerIndex[peer2] 266 | else: 267 | index2 = numRequests 268 | 269 | print('%f\t%d\t%f\t%d' % (time1, index1, time2, index2)) 270 | 271 | 272 | 273 | 274 | #### 275 | # Output the times when 'context canceled' events occured. 276 | # These are plotted as thick vertical dashed lines in black. 277 | #### 278 | 279 | print('\n\n#Context canceled') 280 | print('#time\tnumPeers\tlabel') 281 | 282 | for time in context_canceled: 283 | count = context_canceled[time] 284 | rel_time = (time - startTime).total_seconds() 285 | print('%.2f\t%.2f\t%d' % (rel_time, len(order)+1, count) ) 286 | 287 | 288 | 289 | 290 | #### 291 | # Output the times when peers are reported as selected targets. 292 | # These are plotted as thick vertical dashed lines in green. 293 | #### 294 | 295 | print('\n\n#Time(s) when the K closest peers to the target ID were determined') 296 | print('#time\tnumPeers\tlabel') 297 | 298 | for time in target_determined: 299 | count = target_determined[time] 300 | rel_time = (time - startTime).total_seconds() 301 | print('%.2f\t%.2f\t%d' % (rel_time, len(order)+1, count) ) 302 | 303 | 304 | 305 | #### 306 | # Finally, output the list of lines that have *not* matched any 307 | # regex pattern during parsing. 308 | # 309 | # These lines are not included in the plots, but they are very useful 310 | # when trying to interpret the logs in full detail, and to see 311 | # what has been left out in special cases, for future improvements 312 | # of this parser. 313 | #### 314 | 315 | print('\n\n#Lines ignored while parsing') 316 | 317 | for line in unmatched_lines: 318 | dtStr = '' 319 | 320 | if line[1] != None: 321 | dt = (line[1] - startTime).total_seconds() 322 | dtStr = ' (time: %.2f)' % dt 323 | print('%d%s: %s' % (line[0], dtStr, line[2]) ) 324 | -------------------------------------------------------------------------------- /visualization/mult2hex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import multihash 6 | import hashlib 7 | from typing import Union 8 | import base58 9 | 10 | 11 | def printb(bytes_array): 12 | return ''.join(format(x, '02x') for x in bytes_array) 13 | 14 | 15 | def multihash_to_hex(b58_encoded_peer_id_str: str) -> str: 16 | """Converts base-58 multihash to hex representation""" 17 | bytes = base58.b58decode(b58_encoded_peer_id_str) 18 | bytes = bytes[2:] 19 | #sha256 = hashlib.sha256(bytes).digest() 20 | return printb(bytes) 21 | 22 | 23 | for line in sys.stdin.readlines(): 24 | line = line.strip() 25 | 26 | h = multihash_to_hex(line) 27 | print(h) 28 | 29 | -------------------------------------------------------------------------------- /visualization/mult2kad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import multihash 6 | import hashlib 7 | from typing import Union 8 | import base58 9 | 10 | 11 | 12 | def multihash_to_kademlia(b58_encoded_peer_id_str: str) -> str: 13 | """Converts base-58 multihash to kademlia ID in hex representation""" 14 | bytes = base58.b58decode(b58_encoded_peer_id_str) 15 | sha256 = hashlib.sha256(bytes).digest() 16 | return sha256.hex() 17 | 18 | 19 | for line in sys.stdin.readlines(): 20 | line = line.strip() 21 | 22 | h = multihash_to_kademlia(line) 23 | print(h) 24 | 25 | -------------------------------------------------------------------------------- /visualization/multihash/__init__.py: -------------------------------------------------------------------------------- 1 | # pymultihash: Python implementation of the multihash specification 2 | # 3 | # Initial author: Ivan Vilata-i-Balaguer 4 | # License: MIT 5 | 6 | """Python implementation of the multihash specification 7 | 8 | This is an implementation of the `multihash`_ specification in Python. 9 | The main component in the module is the `Multihash` class, a named tuple that 10 | represents a hash function and a digest created with it, with extended 11 | abilities to work with hashlib-compatible hash functions, verify the integrity 12 | of data, and encode itself to a byte string in the binary format described in 13 | the specification (possibly ASCII-encoded). The `decode()` function can be 14 | used for the inverse operation, i.e. converting a (possibly ASCII-encoded) 15 | byte string into a `Multihash` object. 16 | 17 | .. _multihash: https://github.com/jbenet/multihash 18 | 19 | Basic usage 20 | =========== 21 | 22 | Decoding 23 | -------- 24 | 25 | One of the basic cases happens when you have a multihash-encoded digest like: 26 | 27 | >>> mhash = b'EiAsJrRraP/Gj/mbRTwdMEE0E0ItcGSDv6D5il6IYmbnrg==' 28 | 29 | You know beforehand that the multihash is Base64-encoded. You also have some 30 | data and you want to check if it matches that digest: 31 | 32 | >>> data = b'foo' 33 | 34 | To perform this check, you may first *decode* the multihash (i.e. parse it) 35 | into a `Multihash` object, which provides the ``verify()`` method to validate 36 | the given byte string against the encoded digest: 37 | 38 | >>> import multihash 39 | >>> mh = multihash.decode(mhash, 'base64') 40 | >>> mh.verify(data) 41 | True 42 | 43 | Please note that you needed to specify that the multihash is Base64-encoded, 44 | otherwise binary encoding is assumed (and the decoding will probably fail). 45 | The verification internally uses a hashlib-compatible implementation of the 46 | function indicated by the encoded multihash to check the data. Read more 47 | about codecs and hash functions further below. 48 | 49 | The function in a `Multihash` object is stored as a member of the `Func` 50 | enumeration, which contains one member per function listed in the `multihash`_ 51 | specification. The name of a `Func` member is the name of that function in 52 | the specification (with hyphens replaced by underscores), and its value is the 53 | function code. The `Multihash` object also contains the binary string with 54 | the raw hash digest. Application-specific hash functions are also supported, 55 | but their numeric code is used instead of a `Func` member. 56 | 57 | >>> mh # doctest: +ELLIPSIS 58 | Multihash(func=, digest=b'...') 59 | >>> hex(mh.func.value) 60 | '0x12' 61 | >>> len(mh.digest) 62 | 32 63 | 64 | The short representation of a `Multihash` object only shows the function name 65 | (or its code if application-specific), and the Base64-encoded version of the 66 | raw hash digest: 67 | 68 | >>> print(mh) 69 | Multihash(sha2_256, b64:LCa0a2j/xo/5m0U8HTBBNBNCLXBkg7+g+YpeiGJm564=) 70 | 71 | If you need a shorter multihash, you may truncate it while keeping the initial 72 | bytes of the raw hash digest. A byte string validates against a truncated 73 | multihash if its digest bytes match the initial bytes of the string's hash: 74 | 75 | >>> mh_trunc = mh.truncate(16) 76 | >>> print(mh_trunc) 77 | Multihash(sha2_256, b64:LCa0a2j/xo/5m0U8HTBBNA==) 78 | >>> mh_trunc.verify(data) 79 | True 80 | 81 | Encoding 82 | -------- 83 | 84 | Now imagine that you have some data and you want to create a multihash out of 85 | it. First you must create a `Multihash` instance with the desired function 86 | and the computed binary digest. If you already know them, you may create the 87 | `Multihash` instance directly: 88 | 89 | >>> mh = multihash.Multihash(multihash.Func.sha2_512, b'...') 90 | >>> print(mh) # doctest: +ELLIPSIS 91 | Multihash(sha2_512, b64:...) 92 | 93 | Instead of the `Func` member, you may find more comfortable to use the 94 | function name (e.g. ``'sha2-512'`` or ``'sha2_512'``) or its code (e.g. ``19`` 95 | or ``0x13``). Or you may create `Multihash` instances straight from 96 | hashlib-compatible objects: 97 | 98 | >>> import hashlib 99 | >>> hash = hashlib.sha1(data) 100 | >>> mh = Multihash.from_hash(hash) 101 | >>> print(mh) 102 | Multihash(sha1, b64:C+7Hteo/D9vJXQ3UfzxbwnXaijM=) 103 | 104 | However the easiest way to get a `Multihash` instance is with the `digest()` 105 | function, which internally uses a hashlib-compatible implementation of the 106 | indicated function to do the job for you: 107 | 108 | >>> mh = multihash.digest(data, 'sha1') 109 | >>> print(mh) 110 | Multihash(sha1, b64:C+7Hteo/D9vJXQ3UfzxbwnXaijM=) 111 | 112 | In any case, getting the multihash-encoded digest is very simple: 113 | 114 | >>> mh.encode('base64') 115 | b'ERQL7se16j8P28ldDdR/PFvCddqKMw==' 116 | 117 | As before, an encoding (Base64) was specified to avoid getting the binary 118 | version of the multihash. 119 | 120 | The hash function registry 121 | ========================== 122 | 123 | As the multihash specification indicates, you may use hash function codes in 124 | the range 0x00-0x0f to specify application-specific hash functions. 125 | The `decode()` function allows such multihashes, and the `Multihash` 126 | constructor allows specifying such hash functions by their integer code: 127 | 128 | >>> import multihash 129 | >>> import hashlib 130 | >>> data = b'foo' 131 | >>> mh = multihash.Multihash(0x05, hashlib.md5(data).digest()) 132 | >>> print(mh) # doctest: +ELLIPSIS 133 | Multihash(0x5, b64:rL0Y20zC+Fzt72VPzMSk2A==) 134 | 135 | However this does not allow using more intuitive strings instead of numbers 136 | for application-specific functions, and digesting or verifying with such a 137 | function is not possible: 138 | 139 | >>> multihash.digest(data, 'md5') 140 | Traceback (most recent call last): 141 | ... 142 | KeyError: ('unknown hash function', 'md5') 143 | >>> mh.verify(data) 144 | Traceback (most recent call last): 145 | ... 146 | KeyError: ('unknown hash function', 5) 147 | 148 | The `FuncReg` class helps work around these problems by providing a registry 149 | of hash functions. You may add your application-specific hash functions there 150 | with a code, a name, and optionally a name and a callable object for 151 | hashlib-compatible operations: 152 | 153 | >>> multihash.FuncReg.register(0x05, 'md-5', 'md5', hashlib.md5) 154 | >>> multihash.digest(data, 'md-5') # doctest: +ELLIPSIS 155 | Multihash(func=5, digest=b'...') 156 | >>> mh.verify(data) 157 | True 158 | 159 | You may remove your application-specific functions from the registry as well: 160 | 161 | >>> multihash.FuncReg.unregister(0x05) 162 | 163 | `FuncReg` also allows you to iterate over registered functions (as `Func` 164 | members or function codes), and check if it contains a given function 165 | (i.e. whether the `Func` or code is registered or not). 166 | 167 | >>> [f.name for f in multihash.FuncReg if f == multihash.Func.sha3] 168 | ['sha3_512'] 169 | >>> 0x05 in multihash.FuncReg 170 | False 171 | 172 | The codec registry 173 | ================== 174 | 175 | Although a multihash is properly a binary packing format for a hash digest, it 176 | is not normally exchanged in binary form, but in some ASCII-encoded 177 | representation of it. As seen above, multihash decoding and encoding calls 178 | support an ``encoding`` argument to allow ASCII decoding or encoding for 179 | your convenience. 180 | 181 | The encodings mentioned in the multihash standard are already enabled and 182 | available by using their name (a string) as the ``encoding`` argument. 183 | The ``base58`` encoding needs that the ``base58`` package is 184 | installed, though. 185 | 186 | The ``CodecReg`` class allows you to access the available codecs and register 187 | your own ones (or replace existing ones) with a name and encoding and decoding 188 | callables that get and return byte strings. For instance, to add the uuencode 189 | codec: 190 | 191 | >>> import multihash 192 | >>> import binascii 193 | >>> multihash.CodecReg.register('uu', binascii.b2a_uu, binascii.a2b_uu) 194 | 195 | To use it: 196 | 197 | >>> mhash = b'6$10+[L>UZC\\\\/V\\\\E=#=1_/%O"==J*,P \\n' 198 | >>> mh = multihash.decode(mhash, 'uu') 199 | >>> print(mh) 200 | Multihash(sha1, b64:C+7Hteo/D9vJXQ3UfzxbwnXaijM=) 201 | >>> mh.encode('uu') == mhash 202 | True 203 | 204 | You may remove any codec from the registry as well: 205 | 206 | >>> multihash.CodecReg.unregister('uu') 207 | 208 | `CodecReg` also allows you to iterate over registered codec names, and check 209 | if it contains a given codec (i.e. whether it is registered or not). 210 | 211 | >>> {'hex', 'base64'}.issubset(multihash.CodecReg) 212 | True 213 | >>> 'base32' in multihash.CodecReg 214 | True 215 | """ 216 | 217 | from multihash.version import __version__ # noqa 218 | from multihash.funcs import Func, FuncReg # noqa 219 | from multihash.codecs import CodecReg # noqa 220 | from multihash.multihash import Multihash, digest, decode # noqa 221 | 222 | __all__ = [ 223 | '__version__', 224 | 'Func', 'FuncReg', 225 | 'CodecReg', 226 | 'Multihash', 'digest', 'decode', 227 | ] 228 | -------------------------------------------------------------------------------- /visualization/multihash/codecs.py: -------------------------------------------------------------------------------- 1 | # pymultihash: Python implementation of the multihash specification 2 | # 3 | # Initial author: Ivan Vilata-i-Balaguer 4 | # License: MIT 5 | 6 | """Codec registry""" 7 | 8 | from collections import namedtuple 9 | 10 | # Import codecs mentioned in the multihash spec. 11 | import binascii 12 | import base64 13 | 14 | # Try to import external codecs mentioned in the multihash spec. 15 | try: 16 | import base58 17 | except ImportError: 18 | base58 = None 19 | 20 | 21 | class _CodecRegMeta(type): 22 | def __contains__(self, encoding): 23 | """Return whether `encoding` is a registered codec. 24 | 25 | >>> CodecReg.reset() 26 | >>> 'base64' in CodecReg 27 | True 28 | """ 29 | return encoding in self._codecs 30 | 31 | def __iter__(self): 32 | """Iterate over registered codec names. 33 | 34 | >>> CodecReg.reset() 35 | >>> {'hex', 'base32', 'base64'}.issubset(CodecReg) 36 | True 37 | """ 38 | return iter(self._codecs) 39 | 40 | 41 | class CodecReg(metaclass=_CodecRegMeta): 42 | """Registry of supported codecs.""" 43 | 44 | # Common codec data. 45 | _common_codec_data = [ # (name, encode, decode) 46 | ('hex', binascii.b2a_hex, binascii.a2b_hex), 47 | ('base32', base64.b32encode, base64.b32decode), 48 | ('base64', base64.b64encode, base64.b64decode)] 49 | if base58: 50 | _common_codec_data.append( 51 | ('base58', lambda s: bytes(base58.b58encode(s), 'ascii'), base58.b58decode)) 52 | 53 | # Codec data: encoding and decoding functions (both from bytes to bytes). 54 | _codec = namedtuple('codec', 'encode decode') 55 | 56 | @classmethod 57 | def reset(cls): 58 | """Reset the registry to the standard codecs.""" 59 | cls._codecs = {} 60 | c = cls._codec 61 | for (name, encode, decode) in cls._common_codec_data: 62 | cls._codecs[name] = c(encode, decode) 63 | 64 | @classmethod 65 | def register(cls, name, encode, decode): 66 | """Add a codec to the registry. 67 | 68 | Registers a codec with the given `name` (a string) to be used with the 69 | given `encode` and `decode` functions, which take a `bytes` object and 70 | return another one. An existing codec is replaced. 71 | 72 | >>> import binascii 73 | >>> CodecReg.register('uu', binascii.b2a_uu, binascii.a2b_uu) 74 | >>> CodecReg.get_decoder('uu') is binascii.a2b_uu 75 | True 76 | >>> CodecReg.reset() 77 | >>> 'uu' in CodecReg 78 | False 79 | """ 80 | cls._codecs[name] = cls._codec(encode, decode) 81 | 82 | @classmethod 83 | def unregister(cls, name): 84 | """Remove a codec from the registry. 85 | 86 | Unregisters the codec with the given `name` (a string). If the codec 87 | is not registered, a `KeyError` is raised. 88 | 89 | >>> import binascii 90 | >>> CodecReg.register('uu', binascii.b2a_uu, binascii.a2b_uu) 91 | >>> 'uu' in CodecReg 92 | True 93 | >>> CodecReg.unregister('uu') 94 | >>> 'uu' in CodecReg 95 | False 96 | """ 97 | del cls._codecs[name] 98 | 99 | @classmethod 100 | def get_encoder(cls, encoding): 101 | r"""Return an encoder for the given `encoding`. 102 | 103 | The encoder gets a `bytes` object as argument and returns another 104 | encoded `bytes` object. If the `encoding` is not registered, a 105 | `KeyError` is raised. 106 | 107 | >>> encode = CodecReg.get_encoder('hex') 108 | >>> encode(b'FOO\x00') 109 | b'464f4f00' 110 | """ 111 | return cls._codecs[encoding].encode 112 | 113 | @classmethod 114 | def get_decoder(cls, encoding): 115 | r"""Return a decoder for the given `encoding`. 116 | 117 | The decoder gets a `bytes` object as argument and returns another 118 | decoded `bytes` object. If the `encoding` is not registered, a 119 | `KeyError` is raised. 120 | 121 | >>> decode = CodecReg.get_decoder('hex') 122 | >>> decode(b'464f4f00') 123 | b'FOO\x00' 124 | """ 125 | return cls._codecs[encoding].decode 126 | 127 | # Initialize the codec registry. 128 | CodecReg.reset() 129 | -------------------------------------------------------------------------------- /visualization/multihash/funcs.py: -------------------------------------------------------------------------------- 1 | # pymultihash: Python implementation of the multihash specification 2 | # 3 | # Initial author: Ivan Vilata-i-Balaguer 4 | # License: MIT 5 | 6 | """Enumeration of standard multihash functions, and function registry""" 7 | 8 | from collections import namedtuple 9 | from enum import Enum 10 | from numbers import Integral 11 | 12 | # Import standard hashlib-compatible modules. 13 | import hashlib 14 | 15 | # Try to import known optional hashlib-compatible modules. 16 | try: 17 | import sha3 18 | except ImportError: 19 | sha3 = None 20 | try: 21 | import pyblake2 as blake2 22 | except ImportError: 23 | blake2 = None 24 | 25 | 26 | def _is_app_specific_func(code): 27 | """Is the given hash function integer `code` application-specific?""" 28 | return isinstance(code, Integral) and (0x00 <= code <= 0x0f) 29 | 30 | 31 | class Func(Enum): 32 | """An enumeration of hash functions supported by multihash. 33 | 34 | The name of each member has its hyphens replaced by underscores. 35 | The value of each member corresponds to its integer code. 36 | 37 | >>> Func.sha2_512.value == 0x13 38 | True 39 | """ 40 | sha1 = 0x11 41 | sha2_256 = 0x12 42 | sha2_512 = 0x13 43 | # See jbenet/multihash#11 for new SHA-3 function names and codes. 44 | sha3_512 = 0x14 45 | sha3 = sha3_512 # deprecated, for backwards compatibility 46 | sha3_384 = 0x15 47 | sha3_256 = 0x16 48 | sha3_224 = 0x17 49 | shake_128 = 0x18 50 | shake_256 = 0x19 51 | blake2b = 0x40 52 | blake2s = 0x41 53 | 54 | 55 | class _FuncRegMeta(type): 56 | def __contains__(self, func): 57 | """Return whether `func` is a registered function. 58 | 59 | >>> FuncReg.reset() 60 | >>> Func.sha2_256 in FuncReg 61 | True 62 | """ 63 | return func in self._func_hash 64 | 65 | def __iter__(self): 66 | """Iterate over registered functions. 67 | 68 | Standard multihash functions are represented as members of `Func`, 69 | while application-specific functions are integers. 70 | 71 | >>> FuncReg.reset() 72 | >>> set(FuncReg) == set(Func) 73 | True 74 | """ 75 | return iter(self._func_hash) 76 | 77 | 78 | class FuncReg(metaclass=_FuncRegMeta): 79 | """Registry of supported hash functions.""" 80 | 81 | # Standard hash function data. 82 | _std_func_data = [ # (func, hash name, hash new) 83 | (Func.sha1, 'sha1', hashlib.sha1), 84 | 85 | (Func.sha2_256, 'sha256', hashlib.sha256), 86 | (Func.sha2_512, 'sha512', hashlib.sha512), 87 | 88 | (Func.sha3_512, 'sha3_512', sha3.sha3_512 if sha3 else None), 89 | (Func.sha3_384, 'sha3_384', sha3.sha3_384 if sha3 else None), 90 | (Func.sha3_256, 'sha3_256', sha3.sha3_256 if sha3 else None), 91 | (Func.sha3_224, 'sha3_224', sha3.sha3_224 if sha3 else None), 92 | 93 | (Func.shake_128, 'shake_128', None), 94 | (Func.shake_256, 'shake_256', None), 95 | 96 | (Func.blake2b, 'blake2b', blake2.blake2b if blake2 else None), 97 | (Func.blake2s, 'blake2s', blake2.blake2s if blake2 else None)] 98 | 99 | # Hashlib compatibility data for a hash: hash name (e.g. ``sha256`` for 100 | # SHA-256, ``sha2-256`` in multihash), and the corresponding constructor. 101 | _hash = namedtuple('hash', 'name new') 102 | 103 | @classmethod 104 | def reset(cls): 105 | """Reset the registry to the standard multihash functions.""" 106 | # Maps function names (hyphens or underscores) to registered functions. 107 | cls._func_from_name = {} 108 | 109 | # Maps hashlib names to registered functions. 110 | cls._func_from_hash = {} 111 | 112 | # Hashlib compatibility data by function. 113 | cls._func_hash = {} 114 | 115 | register = cls._do_register 116 | for (func, hash_name, hash_new) in cls._std_func_data: 117 | register(func, func.name, hash_name, hash_new) 118 | assert set(cls._func_hash) == set(Func) 119 | 120 | @classmethod 121 | def get(cls, func_hint): 122 | """Return a registered hash function matching the given hint. 123 | 124 | The hint may be a `Func` member, a function name (with hyphens or 125 | underscores), or its code. A `Func` member is returned for standard 126 | multihash functions and an integer code for application-specific ones. 127 | If no matching function is registered, a `KeyError` is raised. 128 | 129 | >>> fm = FuncReg.get(Func.sha2_256) 130 | >>> fnu = FuncReg.get('sha2_256') 131 | >>> fnh = FuncReg.get('sha2-256') 132 | >>> fc = FuncReg.get(0x12) 133 | >>> fm == fnu == fnh == fc 134 | True 135 | """ 136 | # Different possibilities of `func_hint`, most to least probable. 137 | try: # `Func` member (or its value) 138 | return Func(func_hint) 139 | except ValueError: 140 | pass 141 | if func_hint in cls._func_from_name: # `Func` member name, extended 142 | return cls._func_from_name[func_hint] 143 | if func_hint in cls._func_hash: # registered app-specific code 144 | return func_hint 145 | raise KeyError("unknown hash function", func_hint) 146 | 147 | @classmethod 148 | def _do_register(cls, code, name, hash_name=None, hash_new=None): 149 | """Add hash function data to the registry without checks.""" 150 | cls._func_from_name[name.replace('-', '_')] = code 151 | cls._func_from_name[name.replace('_', '-')] = code 152 | if hash_name: 153 | cls._func_from_hash[hash_name] = code 154 | cls._func_hash[code] = cls._hash(hash_name, hash_new) 155 | 156 | @classmethod 157 | def register(cls, code, name, hash_name=None, hash_new=None): 158 | """Add an application-specific function to the registry. 159 | 160 | Registers a function with the given `code` (an integer) and `name` (a 161 | string, which is added both with only hyphens and only underscores), 162 | as well as an optional `hash_name` and `hash_new` constructor for 163 | hashlib compatibility. If the application-specific function is 164 | already registered, the related data is replaced. Registering a 165 | function with a `code` not in the application-specific range 166 | (0x00-0xff) or with names already registered for a different function 167 | raises a `ValueError`. 168 | 169 | >>> import hashlib 170 | >>> FuncReg.register(0x05, 'md-5', 'md5', hashlib.md5) 171 | >>> FuncReg.get('md-5') == FuncReg.get('md_5') == 0x05 172 | True 173 | >>> hashobj = FuncReg.hash_from_func(0x05) 174 | >>> hashobj.name == 'md5' 175 | True 176 | >>> FuncReg.func_from_hash(hashobj) == 0x05 177 | True 178 | >>> FuncReg.reset() 179 | >>> 0x05 in FuncReg 180 | False 181 | """ 182 | if not _is_app_specific_func(code): 183 | raise ValueError( 184 | "only application-specific functions can be registered") 185 | # Check already registered name in different mappings. 186 | name_mapping_data = [ # (mapping, name in mapping, error if existing) 187 | (cls._func_from_name, name, 188 | "function name is already registered for a different function"), 189 | (cls._func_from_hash, hash_name, 190 | "hashlib name is already registered for a different function")] 191 | for (mapping, nameinmap, errmsg) in name_mapping_data: 192 | existing_func = mapping.get(nameinmap, code) 193 | if existing_func != code: 194 | raise ValueError(errmsg, existing_func) 195 | # Unregister if existing to ensure no orphan entries. 196 | if code in cls._func_hash: 197 | cls.unregister(code) 198 | # Proceed to registration. 199 | cls._do_register(code, name, hash_name, hash_new) 200 | 201 | @classmethod 202 | def unregister(cls, code): 203 | """Remove an application-specific function from the registry. 204 | 205 | Unregisters the function with the given `code` (an integer). If the 206 | function is not registered, a `KeyError` is raised. Unregistering a 207 | function with a `code` not in the application-specific range 208 | (0x00-0xff) raises a `ValueError`. 209 | 210 | >>> import hashlib 211 | >>> FuncReg.register(0x05, 'md-5', 'md5', hashlib.md5) 212 | >>> FuncReg.get('md-5') 213 | 5 214 | >>> FuncReg.unregister(0x05) 215 | >>> FuncReg.get('md-5') 216 | Traceback (most recent call last): 217 | ... 218 | KeyError: ('unknown hash function', 'md-5') 219 | """ 220 | if code in Func: 221 | raise ValueError( 222 | "only application-specific functions can be unregistered") 223 | # Remove mapping to function by name. 224 | func_names = {n for (n, f) in cls._func_from_name.items() if f == code} 225 | for func_name in func_names: 226 | del cls._func_from_name[func_name] 227 | # Remove hashlib data and mapping to hash. 228 | hash = cls._func_hash.pop(code) 229 | if hash.name: 230 | del cls._func_from_hash[hash.name] 231 | 232 | @classmethod 233 | def func_from_hash(cls, hash): 234 | """Return the multihash `Func` for the hashlib-compatible `hash` object. 235 | 236 | If no `Func` is registered for the given hash, a `KeyError` is raised. 237 | 238 | >>> import hashlib 239 | >>> h = hashlib.sha256() 240 | >>> f = FuncReg.func_from_hash(h) 241 | >>> f is Func.sha2_256 242 | True 243 | """ 244 | return cls._func_from_hash[hash.name] 245 | 246 | @classmethod 247 | def hash_from_func(cls, func): 248 | """Return a hashlib-compatible object for the multihash `func`. 249 | 250 | If the `func` is registered but no hashlib-compatible constructor is 251 | available for it, `None` is returned. If the `func` is not 252 | registered, a `KeyError` is raised. 253 | 254 | >>> h = FuncReg.hash_from_func(Func.sha2_256) 255 | >>> h.name 256 | 'sha256' 257 | """ 258 | new = cls._func_hash[func].new 259 | return new() if new else None 260 | 261 | # Initialize the function hash registry. 262 | FuncReg.reset() 263 | -------------------------------------------------------------------------------- /visualization/multihash/multihash.py: -------------------------------------------------------------------------------- 1 | # pymultihash: Python implementation of the multihash specification 2 | # 3 | # Initial author: Ivan Vilata-i-Balaguer 4 | # License: MIT 5 | 6 | """Multihash class and utility functions""" 7 | 8 | from collections import namedtuple 9 | 10 | import base64 11 | 12 | from multihash.funcs import _is_app_specific_func, Func, FuncReg 13 | from multihash.codecs import CodecReg 14 | 15 | 16 | def _do_digest(data, func): 17 | """Return the binary digest of `data` with the given `func`.""" 18 | func = FuncReg.get(func) 19 | hash = FuncReg.hash_from_func(func) 20 | if not hash: 21 | raise ValueError("no available hash function for hash", func) 22 | hash.update(data) 23 | return bytes(hash.digest()) 24 | 25 | 26 | class Multihash(namedtuple('Multihash', 'func digest')): 27 | """A named tuple representing a multihash function and digest. 28 | 29 | The hash function is usually a `Func` member. 30 | 31 | >>> mh = Multihash(Func.sha1, b'BINARY_DIGEST') 32 | >>> mh == (Func.sha1, b'BINARY_DIGEST') 33 | True 34 | >>> mh == (mh.func, mh.digest) 35 | True 36 | 37 | However it can also be its integer value (the function code) or its string 38 | name (the function name, with either underscore or hyphen). 39 | 40 | >>> mhfc = Multihash(Func.sha1.value, mh.digest) 41 | >>> mhfc == mh 42 | True 43 | >>> mhfn = Multihash('sha2-256', b'...') 44 | >>> mhfn.func is Func.sha2_256 45 | True 46 | 47 | Application-specific codes (0x00-0x0f) are also accepted. Other codes 48 | raise a `KeyError`. 49 | 50 | >>> mhfc = Multihash(0x01, b'...') 51 | >>> mhfc.func 52 | 1 53 | >>> mhfc = Multihash(1234, b'...') 54 | Traceback (most recent call last): 55 | ... 56 | KeyError: ('unknown hash function', 1234) 57 | """ 58 | __slots__ = () 59 | 60 | def __new__(cls, func, digest): 61 | try: 62 | func = FuncReg.get(func) 63 | except KeyError: 64 | if _is_app_specific_func(func): 65 | # Application-specific function codes 66 | # are allowed even if not registered. 67 | func = int(func) 68 | else: 69 | raise 70 | digest = bytes(digest) 71 | return super(cls, Multihash).__new__(cls, func, digest) 72 | 73 | @classmethod 74 | def from_hash(self, hash): 75 | """Create a `Multihash` from a hashlib-compatible `hash` object. 76 | 77 | >>> import hashlib 78 | >>> data = b'foo' 79 | >>> hash = hashlib.sha1(data) 80 | >>> digest = hash.digest() 81 | >>> mh = Multihash.from_hash(hash) 82 | >>> mh == (Func.sha1, digest) 83 | True 84 | 85 | Application-specific hash functions are also supported (see 86 | `FuncReg`). 87 | 88 | If there is no matching multihash hash function for the given `hash`, 89 | a `ValueError` is raised. 90 | """ 91 | try: 92 | func = FuncReg.func_from_hash(hash) 93 | except KeyError as ke: 94 | raise ValueError( 95 | "no matching multihash function", hash.name) from ke 96 | digest = hash.digest() 97 | return Multihash(func, digest) 98 | 99 | def __str__(self): 100 | """Return a compact string representation of the multihash. 101 | 102 | The representation includes the name of the standard multihash 103 | function or the hexadecimal code of the application-specific one, and 104 | a Base64-encoded version of the raw digest. This is *not* the 105 | complete multihash-encoded digest that can be obtained with 106 | `Multihash.encode()`. 107 | 108 | >>> mh = Multihash(Func.sha1, b'TEST') 109 | >>> print(mh) 110 | Multihash(sha1, b64:VEVTVA==) 111 | >>> mh = Multihash(0x01, b'TEST') 112 | >>> print(mh) 113 | Multihash(0x1, b64:VEVTVA==) 114 | """ 115 | return 'Multihash({func}, b64:{digest})'.format( 116 | func=self.func.name if self.func in Func else hex(self.func), 117 | digest=base64.b64encode(self.digest).decode() 118 | ) 119 | 120 | def encode(self, encoding=None): 121 | r"""Encode into a multihash-encoded digest. 122 | 123 | If `encoding` is `None`, a binary digest is produced: 124 | 125 | >>> mh = Multihash(0x01, b'TEST') 126 | >>> mh.encode() 127 | b'\x01\x04TEST' 128 | 129 | If the name of an `encoding` is specified, it is used to encode the 130 | binary digest before returning it (see `CodecReg` for supported 131 | codecs). 132 | 133 | >>> mh.encode('base64') 134 | b'AQRURVNU' 135 | 136 | If the `encoding` is not available, a `KeyError` is raised. 137 | """ 138 | try: 139 | fc = self.func.value 140 | except AttributeError: # application-specific function code 141 | fc = self.func 142 | mhash = bytes([fc, len(self.digest)]) + self.digest 143 | if encoding: 144 | mhash = CodecReg.get_encoder(encoding)(mhash) 145 | return mhash 146 | 147 | def verify(self, data): 148 | r"""Does the given `data` hash to the digest in this `Multihash`? 149 | 150 | >>> import hashlib 151 | >>> data = b'foo' 152 | >>> hash = hashlib.sha1(data) 153 | >>> mh = Multihash.from_hash(hash) 154 | >>> mh.verify(data) 155 | True 156 | >>> mh.verify(b'foobar') 157 | False 158 | 159 | Application-specific hash functions are also supported (see 160 | `FuncReg`). 161 | """ 162 | digest = _do_digest(data, self.func) 163 | return digest[:len(self.digest)] == self.digest 164 | 165 | def truncate(self, length): 166 | """Return a new `Multihash` with a shorter digest `length`. 167 | 168 | If the given `length` is greater than the original, a `ValueError` 169 | is raised. 170 | 171 | >>> mh1 = Multihash(0x01, b'FOOBAR') 172 | >>> mh2 = mh1.truncate(3) 173 | >>> mh2 == (0x01, b'FOO') 174 | True 175 | >>> mh3 = mh1.truncate(10) 176 | Traceback (most recent call last): 177 | ... 178 | ValueError: cannot enlarge the original digest by 4 bytes 179 | """ 180 | if length > len(self.digest): 181 | raise ValueError("cannot enlarge the original digest by %d bytes" 182 | % (length - len(self.digest))) 183 | return self.__class__(self.func, self.digest[:length]) 184 | 185 | 186 | def digest(data, func): 187 | """Hash the given `data` into a new `Multihash`. 188 | 189 | The given hash function `func` is used to perform the hashing. It must be 190 | a registered hash function (see `FuncReg`). 191 | 192 | >>> data = b'foo' 193 | >>> mh = digest(data, Func.sha1) 194 | >>> mh.encode('base64') 195 | b'ERQL7se16j8P28ldDdR/PFvCddqKMw==' 196 | """ 197 | digest = _do_digest(data, func) 198 | return Multihash(func, digest) 199 | 200 | 201 | def decode(mhash, encoding=None): 202 | r"""Decode a multihash-encoded digest into a `Multihash`. 203 | 204 | If `encoding` is `None`, a binary digest is assumed. 205 | 206 | >>> mhash = b'\x11\x0a\x0b\xee\xc7\xb5\xea?\x0f\xdb\xc9]' 207 | >>> mh = decode(mhash) 208 | >>> mh == (Func.sha1, mhash[2:]) 209 | True 210 | 211 | If the name of an `encoding` is specified, it is used to decode the digest 212 | before parsing it (see `CodecReg` for supported codecs). 213 | 214 | >>> import base64 215 | >>> emhash = base64.b64encode(mhash) 216 | >>> emh = decode(emhash, 'base64') 217 | >>> emh == mh 218 | True 219 | 220 | If the `encoding` is not available, a `KeyError` is raised. If the digest 221 | has an invalid format or contains invalid data, a `ValueError` is raised. 222 | """ 223 | mhash = bytes(mhash) 224 | if encoding: 225 | mhash = CodecReg.get_decoder(encoding)(mhash) 226 | try: 227 | func = mhash[0] 228 | length = mhash[1] 229 | digest = mhash[2:] 230 | except IndexError as ie: 231 | raise ValueError("multihash is too short") from ie 232 | if length != len(digest): 233 | raise ValueError( 234 | "multihash length field does not match digest field length") 235 | return Multihash(func, digest) 236 | -------------------------------------------------------------------------------- /visualization/multihash/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.9.0.dev1' 2 | -------------------------------------------------------------------------------- /visualization/plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | cat $1 | ./log2plot.py > DATA 4 | gnuplot graph.gpi 5 | -------------------------------------------------------------------------------- /visualization/shrink.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import hashlib 6 | import base58 7 | 8 | def get_hex(b58_encoded_peer_id_str: str) -> str: 9 | """Converts base-58 multihash to hex representation""" 10 | bytes = base58.b58decode(b58_encoded_peer_id_str) 11 | sha256 = hashlib.sha256(bytes).digest() 12 | return sha256.hex()[:6] 13 | 14 | def shorten(str): 15 | if str.startswith('Qm') or str.startswith('12D3Koo'): 16 | if str.endswith(':'): 17 | str = str[:-1] 18 | return '<%s>' % (get_hex(str)) 19 | else: 20 | return str 21 | 22 | 23 | for line in sys.stdin: 24 | words = line.split() 25 | words = tuple(map(shorten, words)) 26 | print('%s '*len(words) % words, flush=True) 27 | --------------------------------------------------------------------------------