└── visualization
    ├── README.md
    ├── graph.gpi
    ├── hex2mult.py
    ├── log2plot.py
    ├── mult2hex.py
    ├── mult2kad.py
    ├── multihash
        ├── __init__.py
        ├── codecs.py
        ├── funcs.py
        ├── multihash.py
        └── version.py
    ├── plot
    └── shrink.py


/visualization/README.md:
--------------------------------------------------------------------------------
 1 | # Visualization of IPFS DHT queries
 2 | These scripts enable the visualization of the connections timeline during an IPFS DHT query.
 3 | 
 4 | ## Use instructions
 5 | 
 6 | ### Step 1. Start the IPFS daemon on your machine
 7 | 
 8 | Assuming you have already installed IPFS, you start the IPFS daemon by executing:
 9 | 
10 | ```bash
11 | ipfs daemon
12 | ```
13 | 
14 | ### Step 2. Run an IPFS DHT query
15 | 
16 | To run an IPFS DHT query, you normally execute:
17 | 
18 | ```bash
19 | ipfs dht query <CID>
20 | ```
21 | 
22 | In order to visualize the connections opened and used during a query, you should also provide the verbose flag (-v) and redirect the standard output into a file, say query.log. Here's an example, including a sample CID:
23 | 
24 | ```bash
25 | ipfs dht query -v QmefYbmED9E1cw3NEqtxKDmVrzk3Z351ZDgwzwKdQ4Ajbj > query.log
26 | ```
27 | 
28 | 
29 | 
30 | ### Step 3. Parse the query log
31 | 
32 | To parse the log produced in the previous step, you should execute:
33 | 
34 | ```bash
35 | cat query.log | ./log2plot.py > DATA
36 | ```
37 | 
38 | This will parse the query log, and will output visualization data to be consumed by gnuplot. We redirect that output into a file called DATA, which the gnuplot script expects to find.
39 | 
40 | ### Step 4. Visualize the data
41 | 
42 | At this final step, you execute [gnuplot](http://www.gnuplot.info/) to produce the visualization. You execute:
43 | 
44 | ```bash
45 | gnuplot graph.gpi
46 | ```
47 | 
48 | This will pop up a window with the timeline of connections that took place during the query.
49 | 
50 | You may optionally click on the top left button in the visualization window to export the plot as a PDF or image.
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/visualization/graph.gpi:
--------------------------------------------------------------------------------
 1 | ###################################################
 2 | #                                                 #
 3 | # Written by Spyros Voulgaris (voulgaris@aueb.gr) #
 4 | # April 2021                                      #
 5 | #                                                 #
 6 | # Mobile Multimedia Lab                           #
 7 | # Athens University of Economics and Business     #
 8 | # Athens, Greece                                  #
 9 | #                                                 #
10 | ###################################################
11 | 
12 | #set term png size 1920,1200
13 | #set term png size 1600,900
14 | #set terminal pdfcairo enhanced size 32cm,18cm font ",8"
15 | #set terminal pdfcairo enhanced font ",8"
16 | #set term postscript eps enhanced color 13
17 | #set size 1.6,1.2
18 | #set size 2,2
19 | set term qt
20 | 
21 | min(a,b) = a<b ? a : b
22 | max(a,b) = a>b ? a : b
23 | max3(a,b,c) = max(max(a,b),max(a,c))
24 | 
25 | set key opaque
26 | 
27 | set xlabel "time (sec)"
28 | 
29 | set xrange [0:*]
30 | set yrange [] reverse
31 | set xtics 1
32 | set ytics 1
33 | set grid
34 | 
35 | plot "DATA" index 0 using 2:1:($3-$2):(0) with vectors nohead lw 2 lc 4 title "dial", \
36 |      "DATA" index 0 using 4:1 with points pt 2 ps 1.5 lw 2 lc 7 title "dial error", \
37 |      "DATA" index 0 using (min($5,$6)):1:($6-$5):(0) with vectors nohead lw 6 lc 3 title "query", \
38 |      "DATA" index 0 using (min($5,$7)):1:($7-$5):(0):5 with vectors nohead lw 6 lc rgb '#A056B4E9' title "unfinished query", \
39 |      "DATA" index 0 using 2:1 with points pt 7 ps 0.6 lc 4 notitle, \
40 |      "DATA" index 1 using 1:2:($3-$1):($4-$2) with vectors filled lc '#20C7162B' lw 1.5 dt 2 title "causality", \
41 |      "DATA" index 1 using 1:2 with points lc '#20C7162B' pt 7 ps 0.6 notitle, \
42 |      "DATA" index 0 using (max3($3,$6,$7)):1:(sprintf("  %s",stringcolumn(8))):($9==0?8:2) with labels textcolor variable left notitle, \
43 |      "DATA" index 2 using 1:(0):(0):2 with vectors nohead lw 3 dt 4 lc 8 title "context canceled", \
44 |      "DATA" index 3 using 1:(0):(0):2 with vectors nohead lw 3 dt 4 lc 2 title "results finalized"
45 | 
46 | pause -1
47 | 


--------------------------------------------------------------------------------
/visualization/hex2mult.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import multihash
 6 | import hashlib
 7 | from typing import Union
 8 | import base58
 9 | 
10 | 
11 | def printb(bytes_array):
12 |   print(''.join(format(x, '02x') for x in bytes_array))
13 | 
14 | 
15 | def hex_to_multihash(hexString: str) -> str:
16 |   """Converts hex string to base-58 multihash"""
17 |   b = bytearray.fromhex(hexString)
18 |   b = b.rjust(32, b'\0')
19 | 
20 |   prefix = int.to_bytes(18,1,'big') + int.to_bytes(32,1,'big')
21 | 
22 |   b = prefix+b
23 | 
24 |   multStr = base58.b58encode(bytes(b))
25 |   return multStr.decode('utf-8')
26 | 
27 | 
28 | for line in sys.stdin.readlines():
29 |   line = line.strip()
30 | 
31 |   h = hex_to_multihash(line)
32 |   print(h)
33 | 
34 | 


--------------------------------------------------------------------------------
/visualization/log2plot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | ###################################################
  4 | #                                                 #
  5 | # Written by Spyros Voulgaris (voulgaris@aueb.gr) #
  6 | # April 2021                                      #
  7 | #                                                 #
  8 | # Mobile Multimedia Lab                           #
  9 | # Athens University of Economics and Business     #
 10 | # Athens, Greece                                  #
 11 | #                                                 #
 12 | ###################################################
 13 | 
 14 | import sys
 15 | import re
 16 | from datetime import datetime
 17 | from datetime import timedelta
 18 | import collections
 19 | import base58
 20 | import hashlib
 21 | 
 22 | 
 23 | 
 24 | 
 25 | ####
 26 | # Global variables
 27 | ####
 28 | 
 29 | ID_LEN = 6
 30 | 
 31 | requests = collections.defaultdict(dict)
 32 | order = []
 33 | causality = []
 34 | 
 35 | startTime=None
 36 | endTime=None
 37 | lastResponse=None
 38 | 
 39 | context_canceled = collections.defaultdict(int)
 40 | 
 41 | target_determined = collections.defaultdict(int)
 42 | 
 43 | unmatched_lines = []
 44 | 
 45 | 
 46 | 
 47 | ####
 48 | # Function that converts a multihash to its hex representation
 49 | ####
 50 | 
 51 | def get_hex(b58_encoded_peer_id_str: str) -> str:
 52 |   """Converts base-58 multihash to hex representation"""
 53 |   bytes = base58.b58decode(b58_encoded_peer_id_str)
 54 |   sha256 = hashlib.sha256(bytes).digest()
 55 |   return sha256.hex()
 56 | 
 57 | 
 58 | 
 59 | ####
 60 | # The handle_<EVENT> functions are called by the main loop when
 61 | # a line matching the respective event's regex is found.
 62 | ####
 63 | 
 64 | def handle_querying(time, match):
 65 |     peer = get_hex(match.group(2))
 66 | 
 67 |     if peer not in order:
 68 |       order.append(peer)
 69 | 
 70 |     requests[peer]['query_start'] = time
 71 | 
 72 |     # set query_end to this time too, in case it never ends
 73 |     #requests[peer]['query_end'] = time
 74 | 
 75 |     # if a dialing had started, record that it just ended
 76 |     if 'dial_start' in requests[peer]:
 77 |       requests[peer]['dial_end'] = time
 78 | 
 79 | def handle_dialing(time, match):
 80 |     global lastResponse
 81 |     peer = get_hex(match.group(2))
 82 | 
 83 |     if peer not in order:
 84 |       order.append(peer)
 85 | 
 86 |     requests[peer]['dial_start'] = time
 87 | 
 88 |     # set dial_end to this time too, in case it never ends
 89 |     requests[peer]['dial_end'] = time
 90 | 
 91 |     if lastResponse!=None:
 92 |       causality.append( (lastResponse[0], lastResponse[1], peer, time) )
 93 | 
 94 | def handle_says_use(time, match):
 95 |     global lastResponse
 96 |     peer = get_hex(match.group(2))
 97 |     requests[peer]['query_end'] = time
 98 |     lastResponse = (peer, time)
 99 | 
100 | def handle_dial_error(time, match):
101 |     global lastResponse
102 |     peer = get_hex(match.group(2))
103 |     requests[peer]['dial_error'] = time
104 |     requests[peer]['dial_end'] = time
105 |     lastResponse = (peer, time)
106 | 
107 | def handle_context_canceled(time, match):
108 |     context_canceled[time] += 1
109 | 
110 | def handle_target_found(time, match):
111 |     global endTime
112 |     peer = get_hex(match.group(2))
113 |     target_determined[time] += 1
114 |     endTime = time
115 |     requests[peer]['in_targets'] = True
116 | 
117 | 
118 | 
119 | ####
120 | # The following function returns the 'relative time', i.e.,
121 | # the time in seconds since the beginning of this query,
122 | # for a the time when 'event_type' occured for this 'peer'.
123 | ####
124 | 
125 | def relative_time(peer, event_type):
126 |   one_sec = timedelta(seconds=1)
127 |   minus_one = startTime - one_sec
128 | 
129 |   time = requests[peer].get(event_type, minus_one)
130 |   rel_time = (time - startTime).total_seconds()
131 | 
132 |   return rel_time
133 | 
134 | 
135 | 
136 | ####
137 | # REGULAR EXPRESSIONS
138 | #
139 | # The following statements define the regular expressions
140 | # to use for parsing logs.
141 | #
142 | # The regex dict associated to each event type a tuple <regex,func>,
143 | # containing the regex to detect that event and the handler function.
144 | ####
145 | 
146 | # Time format
147 | re_time = '(\d\d:\d\d:\d\d.\d\d\d)'
148 | 
149 | # Multihashes
150 | re_Qm = 'Qm\w{44}'  # Qm multihash
151 | re_12D3KooW = '12D3KooW\w{44}'  # 12D3KooW multihash
152 | re_multihash = '('+re_Qm+'|'+re_12D3KooW+')'
153 | 
154 | # Lines
155 | regex = {}
156 | regex['querying'] = (re_time + ': \* querying ' + re_multihash, handle_querying)
157 | regex['dialing'] = (re_time + ': dialing peer: ' + re_multihash, handle_dialing)
158 | regex['says_use'] = (re_time + ': \* ' + re_multihash + ' says use(?: '+re_multihash+')*', handle_says_use)
159 | regex['dial_error'] = (re_time + ': error: failed to dial ' + re_multihash + ': all dials failed', handle_dial_error)
160 | regex['context_canceled'] = (re_time + ': error: context canceled$', handle_context_canceled)
161 | regex['target_found'] = (re_time + ': ' + re_multihash, handle_target_found)
162 | 
163 | 
164 | 
165 | 
166 | 
167 | ####
168 | # The main loop of the parser, iterating through stdin lines, one at a time.
169 | # For each line, it first tried to match a time regex in the beginning.
170 | # Then, it loops through all patterns defined in the regex dict, and if
171 | # one is found, it calls the respective handler function.
172 | ####
173 | 
174 | lineNum = 0
175 | for line in sys.stdin:
176 |   line = line.strip()
177 |   #print(line)
178 | 
179 |   lineNum += 1
180 | 
181 |   # First, parse time, which is common for most lines.
182 |   # If a line does not report a time keep the previous time,
183 |   # as it probably refers to the same event.
184 |   match = re.match(re_time, line)
185 |   if match:
186 |     time = datetime.strptime(match.group(1), '%H:%M:%S.%f')
187 |     if startTime==None:
188 |       startTime = time
189 | 
190 | 
191 |   # Check all regular expressions for a match
192 |   matched = False
193 |   for r in regex:
194 |     m = re.match(regex[r][0], line)
195 |     if m:
196 |       regex[r][1](time, m)
197 |       matched = True
198 |       break
199 |   
200 |   if not matched:
201 |     unmatched_lines.append( (lineNum, time, line) )
202 | 
203 | 
204 | 
205 | 
206 | 
207 | ####
208 | # Parsing is complete. Let's output the results!
209 | #
210 | # First output for each peer contacted the times for starting/ending
211 | # the respective dialing and querying,as well as whether there was
212 | # an error with dialing, and whether that peer was eventually
213 | # among the set of K peers selected as targets.
214 | ####
215 | 
216 | peerIndex = {}
217 | numRequests = len(order)
218 | 
219 | print('#hop\tdial_st\tdial_end\tdial_err\tquery_st\tquery_end\tpeer_hash\ttarget')
220 | 
221 | for i,peer in enumerate(order):
222 | 
223 |   dial_start = relative_time(peer, 'dial_start')
224 |   dial_end = relative_time(peer, 'dial_end')
225 |   dial_error = relative_time(peer, 'dial_error')
226 | 
227 |   query_start = relative_time(peer, 'query_start')
228 |   query_end = relative_time(peer, 'query_end')
229 | 
230 |   in_targets = int('in_targets' in requests[peer])
231 | 
232 |   if query_start > query_end and endTime != None :  # i.e., this query started but never ended
233 |     query_unfinished = (endTime - startTime).total_seconds()
234 |   else:
235 |     query_unfinished = -1
236 | 
237 |   print('%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%s\t%d' % (i+1, dial_start, dial_end, dial_error, query_start, query_end, query_unfinished, peer[0:ID_LEN], in_targets))
238 | 
239 |   peerIndex[peer] = i+1
240 | 
241 | 
242 | 
243 | 
244 | ####
245 | # Output the causality relations between events,
246 | # namely providing coordinates for the respective red arrows in the plot.
247 | #
248 | # The causality relations are determined in a best effort way,
249 | # which is *not* error-proof.
250 | ####
251 | 
252 | print('\n\n#Causality')
253 | print('#timeA\tpeerA\ttimeB\tpeerB')
254 | 
255 | for (peer1, time1, peer2, time2) in causality:
256 |   time1 = (time1 - startTime).total_seconds()
257 |   time2 = (time2 - startTime).total_seconds()
258 | 
259 |   if peer1 in peerIndex:
260 |     index1 = peerIndex[peer1]
261 |   else:
262 |     index1 = 0
263 | 
264 |   if peer2 in peerIndex:
265 |     index2 = peerIndex[peer2]
266 |   else:
267 |     index2 = numRequests
268 | 
269 |   print('%f\t%d\t%f\t%d' % (time1, index1, time2, index2))
270 | 
271 | 
272 | 
273 | 
274 | ####
275 | # Output the times when 'context canceled' events occured.
276 | # These are plotted as thick vertical dashed lines in black.
277 | ####
278 | 
279 | print('\n\n#Context canceled')
280 | print('#time\tnumPeers\tlabel')
281 | 
282 | for time in context_canceled:
283 |   count = context_canceled[time]
284 |   rel_time = (time - startTime).total_seconds()
285 |   print('%.2f\t%.2f\t%d' % (rel_time, len(order)+1, count) )
286 | 
287 | 
288 | 
289 | 
290 | ####
291 | # Output the times when peers are reported as selected targets.
292 | # These are plotted as thick vertical dashed lines in green.
293 | ####
294 | 
295 | print('\n\n#Time(s) when the K closest peers to the target ID were determined')
296 | print('#time\tnumPeers\tlabel')
297 | 
298 | for time in target_determined:
299 |   count = target_determined[time]
300 |   rel_time = (time - startTime).total_seconds()
301 |   print('%.2f\t%.2f\t%d' % (rel_time, len(order)+1, count) )
302 | 
303 | 
304 | 
305 | ####
306 | # Finally, output the list of lines that have *not* matched any
307 | # regex pattern during parsing.
308 | #
309 | # These lines are not included in the plots, but they are very useful
310 | # when trying to interpret the logs in full detail, and to see
311 | # what has been left out in special cases, for future improvements
312 | # of this parser.
313 | ####
314 | 
315 | print('\n\n#Lines ignored while parsing')
316 | 
317 | for line in unmatched_lines:
318 |   dtStr = ''
319 |   
320 |   if line[1] != None:
321 |     dt = (line[1] - startTime).total_seconds()
322 |     dtStr = ' (time: %.2f)' % dt
323 |   print('%d%s: %s' % (line[0], dtStr, line[2]) )
324 | 


--------------------------------------------------------------------------------
/visualization/mult2hex.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import multihash
 6 | import hashlib
 7 | from typing import Union
 8 | import base58
 9 | 
10 | 
11 | def printb(bytes_array):
12 |   return ''.join(format(x, '02x') for x in bytes_array)
13 | 
14 | 
15 | def multihash_to_hex(b58_encoded_peer_id_str: str) -> str:
16 |   """Converts base-58 multihash to hex representation"""
17 |   bytes = base58.b58decode(b58_encoded_peer_id_str)
18 |   bytes = bytes[2:]
19 |   #sha256 = hashlib.sha256(bytes).digest()
20 |   return printb(bytes)
21 | 
22 | 
23 | for line in sys.stdin.readlines():
24 |   line = line.strip()
25 | 
26 |   h = multihash_to_hex(line)
27 |   print(h)
28 | 
29 | 


--------------------------------------------------------------------------------
/visualization/mult2kad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import multihash
 6 | import hashlib
 7 | from typing import Union
 8 | import base58
 9 | 
10 | 
11 | 
12 | def multihash_to_kademlia(b58_encoded_peer_id_str: str) -> str:
13 |   """Converts base-58 multihash to kademlia ID in hex representation"""
14 |   bytes = base58.b58decode(b58_encoded_peer_id_str) 
15 |   sha256 = hashlib.sha256(bytes).digest()
16 |   return sha256.hex()
17 | 
18 | 
19 | for line in sys.stdin.readlines():
20 |   line = line.strip()
21 | 
22 |   h = multihash_to_kademlia(line)
23 |   print(h)
24 | 
25 | 


--------------------------------------------------------------------------------
/visualization/multihash/__init__.py:
--------------------------------------------------------------------------------
  1 | # pymultihash: Python implementation of the multihash specification
  2 | #
  3 | # Initial author: Ivan Vilata-i-Balaguer
  4 | # License: MIT
  5 | 
  6 | """Python implementation of the multihash specification
  7 | 
  8 | This is an implementation of the `multihash`_ specification in Python.
  9 | The main component in the module is the `Multihash` class, a named tuple that
 10 | represents a hash function and a digest created with it, with extended
 11 | abilities to work with hashlib-compatible hash functions, verify the integrity
 12 | of data, and encode itself to a byte string in the binary format described in
 13 | the specification (possibly ASCII-encoded).  The `decode()` function can be
 14 | used for the inverse operation, i.e. converting a (possibly ASCII-encoded)
 15 | byte string into a `Multihash` object.
 16 | 
 17 | .. _multihash: https://github.com/jbenet/multihash
 18 | 
 19 | Basic usage
 20 | ===========
 21 | 
 22 | Decoding
 23 | --------
 24 | 
 25 | One of the basic cases happens when you have a multihash-encoded digest like:
 26 | 
 27 | >>> mhash = b'EiAsJrRraP/Gj/mbRTwdMEE0E0ItcGSDv6D5il6IYmbnrg=='
 28 | 
 29 | You know beforehand that the multihash is Base64-encoded.  You also have some
 30 | data and you want to check if it matches that digest:
 31 | 
 32 | >>> data = b'foo'
 33 | 
 34 | To perform this check, you may first *decode* the multihash (i.e. parse it)
 35 | into a `Multihash` object, which provides the ``verify()`` method to validate
 36 | the given byte string against the encoded digest:
 37 | 
 38 | >>> import multihash
 39 | >>> mh = multihash.decode(mhash, 'base64')
 40 | >>> mh.verify(data)
 41 | True
 42 | 
 43 | Please note that you needed to specify that the multihash is Base64-encoded,
 44 | otherwise binary encoding is assumed (and the decoding will probably fail).
 45 | The verification internally uses a hashlib-compatible implementation of the
 46 | function indicated by the encoded multihash to check the data.  Read more
 47 | about codecs and hash functions further below.
 48 | 
 49 | The function in a `Multihash` object is stored as a member of the `Func`
 50 | enumeration, which contains one member per function listed in the `multihash`_
 51 | specification.  The name of a `Func` member is the name of that function in
 52 | the specification (with hyphens replaced by underscores), and its value is the
 53 | function code.  The `Multihash` object also contains the binary string with
 54 | the raw hash digest.  Application-specific hash functions are also supported,
 55 | but their numeric code is used instead of a `Func` member.
 56 | 
 57 | >>> mh  # doctest: +ELLIPSIS
 58 | Multihash(func=<Func.sha2_256: 18>, digest=b'...')
 59 | >>> hex(mh.func.value)
 60 | '0x12'
 61 | >>> len(mh.digest)
 62 | 32
 63 | 
 64 | The short representation of a `Multihash` object only shows the function name
 65 | (or its code if application-specific), and the Base64-encoded version of the
 66 | raw hash digest:
 67 | 
 68 | >>> print(mh)
 69 | Multihash(sha2_256, b64:LCa0a2j/xo/5m0U8HTBBNBNCLXBkg7+g+YpeiGJm564=)
 70 | 
 71 | If you need a shorter multihash, you may truncate it while keeping the initial
 72 | bytes of the raw hash digest.  A byte string validates against a truncated
 73 | multihash if its digest bytes match the initial bytes of the string's hash:
 74 | 
 75 | >>> mh_trunc = mh.truncate(16)
 76 | >>> print(mh_trunc)
 77 | Multihash(sha2_256, b64:LCa0a2j/xo/5m0U8HTBBNA==)
 78 | >>> mh_trunc.verify(data)
 79 | True
 80 | 
 81 | Encoding
 82 | --------
 83 | 
 84 | Now imagine that you have some data and you want to create a multihash out of
 85 | it.  First you must create a `Multihash` instance with the desired function
 86 | and the computed binary digest.  If you already know them, you may create the
 87 | `Multihash` instance directly:
 88 | 
 89 | >>> mh = multihash.Multihash(multihash.Func.sha2_512, b'...')
 90 | >>> print(mh)  # doctest: +ELLIPSIS
 91 | Multihash(sha2_512, b64:...)
 92 | 
 93 | Instead of the `Func` member, you may find more comfortable to use the
 94 | function name (e.g. ``'sha2-512'`` or ``'sha2_512'``) or its code (e.g. ``19``
 95 | or ``0x13``).  Or you may create `Multihash` instances straight from
 96 | hashlib-compatible objects:
 97 | 
 98 | >>> import hashlib
 99 | >>> hash = hashlib.sha1(data)
100 | >>> mh = Multihash.from_hash(hash)
101 | >>> print(mh)
102 | Multihash(sha1, b64:C+7Hteo/D9vJXQ3UfzxbwnXaijM=)
103 | 
104 | However the easiest way to get a `Multihash` instance is with the `digest()`
105 | function, which internally uses a hashlib-compatible implementation of the
106 | indicated function to do the job for you:
107 | 
108 | >>> mh = multihash.digest(data, 'sha1')
109 | >>> print(mh)
110 | Multihash(sha1, b64:C+7Hteo/D9vJXQ3UfzxbwnXaijM=)
111 | 
112 | In any case, getting the multihash-encoded digest is very simple:
113 | 
114 | >>> mh.encode('base64')
115 | b'ERQL7se16j8P28ldDdR/PFvCddqKMw=='
116 | 
117 | As before, an encoding (Base64) was specified to avoid getting the binary
118 | version of the multihash.
119 | 
120 | The hash function registry
121 | ==========================
122 | 
123 | As the multihash specification indicates, you may use hash function codes in
124 | the range 0x00-0x0f to specify application-specific hash functions.
125 | The `decode()` function allows such multihashes, and the `Multihash`
126 | constructor allows specifying such hash functions by their integer code:
127 | 
128 | >>> import multihash
129 | >>> import hashlib
130 | >>> data = b'foo'
131 | >>> mh = multihash.Multihash(0x05, hashlib.md5(data).digest())
132 | >>> print(mh)  # doctest: +ELLIPSIS
133 | Multihash(0x5, b64:rL0Y20zC+Fzt72VPzMSk2A==)
134 | 
135 | However this does not allow using more intuitive strings instead of numbers
136 | for application-specific functions, and digesting or verifying with such a
137 | function is not possible:
138 | 
139 | >>> multihash.digest(data, 'md5')
140 | Traceback (most recent call last):
141 |     ...
142 | KeyError: ('unknown hash function', 'md5')
143 | >>> mh.verify(data)
144 | Traceback (most recent call last):
145 |     ...
146 | KeyError: ('unknown hash function', 5)
147 | 
148 | The `FuncReg` class helps work around these problems by providing a registry
149 | of hash functions.  You may add your application-specific hash functions there
150 | with a code, a name, and optionally a name and a callable object for
151 | hashlib-compatible operations:
152 | 
153 | >>> multihash.FuncReg.register(0x05, 'md-5', 'md5', hashlib.md5)
154 | >>> multihash.digest(data, 'md-5')  # doctest: +ELLIPSIS
155 | Multihash(func=5, digest=b'...')
156 | >>> mh.verify(data)
157 | True
158 | 
159 | You may remove your application-specific functions from the registry as well:
160 | 
161 | >>> multihash.FuncReg.unregister(0x05)
162 | 
163 | `FuncReg` also allows you to iterate over registered functions (as `Func`
164 | members or function codes), and check if it contains a given function
165 | (i.e. whether the `Func` or code is registered or not).
166 | 
167 | >>> [f.name for f in multihash.FuncReg if f == multihash.Func.sha3]
168 | ['sha3_512']
169 | >>> 0x05 in multihash.FuncReg
170 | False
171 | 
172 | The codec registry
173 | ==================
174 | 
175 | Although a multihash is properly a binary packing format for a hash digest, it
176 | is not normally exchanged in binary form, but in some ASCII-encoded
177 | representation of it.  As seen above, multihash decoding and encoding calls
178 | support an ``encoding`` argument to allow ASCII decoding or encoding for
179 | your convenience.
180 | 
181 | The encodings mentioned in the multihash standard are already enabled and
182 | available by using their name (a string) as the ``encoding`` argument.
183 | The ``base58`` encoding needs that the ``base58`` package is
184 | installed, though.
185 | 
186 | The ``CodecReg`` class allows you to access the available codecs and register
187 | your own ones (or replace existing ones) with a name and encoding and decoding
188 | callables that get and return byte strings.  For instance, to add the uuencode
189 | codec:
190 | 
191 | >>> import multihash
192 | >>> import binascii
193 | >>> multihash.CodecReg.register('uu', binascii.b2a_uu, binascii.a2b_uu)
194 | 
195 | To use it:
196 | 
197 | >>> mhash = b'6$10+[L>UZC\\\\/V\\\\E=#=1_/%O"==J*,P  \\n'
198 | >>> mh = multihash.decode(mhash, 'uu')
199 | >>> print(mh)
200 | Multihash(sha1, b64:C+7Hteo/D9vJXQ3UfzxbwnXaijM=)
201 | >>> mh.encode('uu') == mhash
202 | True
203 | 
204 | You may remove any codec from the registry as well:
205 | 
206 | >>> multihash.CodecReg.unregister('uu')
207 | 
208 | `CodecReg` also allows you to iterate over registered codec names, and check
209 | if it contains a given codec (i.e. whether it is registered or not).
210 | 
211 | >>> {'hex', 'base64'}.issubset(multihash.CodecReg)
212 | True
213 | >>> 'base32' in multihash.CodecReg
214 | True
215 | """
216 | 
217 | from multihash.version import __version__  # noqa
218 | from multihash.funcs import Func, FuncReg  # noqa
219 | from multihash.codecs import CodecReg  # noqa
220 | from multihash.multihash import Multihash, digest, decode  # noqa
221 | 
222 | __all__ = [
223 |     '__version__',
224 |     'Func', 'FuncReg',
225 |     'CodecReg',
226 |     'Multihash', 'digest', 'decode',
227 | ]
228 | 


--------------------------------------------------------------------------------
/visualization/multihash/codecs.py:
--------------------------------------------------------------------------------
  1 | # pymultihash: Python implementation of the multihash specification
  2 | #
  3 | # Initial author: Ivan Vilata-i-Balaguer
  4 | # License: MIT
  5 | 
  6 | """Codec registry"""
  7 | 
  8 | from collections import namedtuple
  9 | 
 10 | # Import codecs mentioned in the multihash spec.
 11 | import binascii
 12 | import base64
 13 | 
 14 | # Try to import external codecs mentioned in the multihash spec.
 15 | try:
 16 |     import base58
 17 | except ImportError:
 18 |     base58 = None
 19 | 
 20 | 
 21 | class _CodecRegMeta(type):
 22 |     def __contains__(self, encoding):
 23 |         """Return whether `encoding` is a registered codec.
 24 | 
 25 |         >>> CodecReg.reset()
 26 |         >>> 'base64' in CodecReg
 27 |         True
 28 |         """
 29 |         return encoding in self._codecs
 30 | 
 31 |     def __iter__(self):
 32 |         """Iterate over registered codec names.
 33 | 
 34 |         >>> CodecReg.reset()
 35 |         >>> {'hex', 'base32', 'base64'}.issubset(CodecReg)
 36 |         True
 37 |         """
 38 |         return iter(self._codecs)
 39 | 
 40 | 
 41 | class CodecReg(metaclass=_CodecRegMeta):
 42 |     """Registry of supported codecs."""
 43 | 
 44 |     # Common codec data.
 45 |     _common_codec_data = [  # (name, encode, decode)
 46 |         ('hex', binascii.b2a_hex, binascii.a2b_hex),
 47 |         ('base32', base64.b32encode, base64.b32decode),
 48 |         ('base64', base64.b64encode, base64.b64decode)]
 49 |     if base58:
 50 |         _common_codec_data.append(
 51 |             ('base58', lambda s: bytes(base58.b58encode(s), 'ascii'), base58.b58decode))
 52 | 
 53 |     # Codec data: encoding and decoding functions (both from bytes to bytes).
 54 |     _codec = namedtuple('codec', 'encode decode')
 55 | 
 56 |     @classmethod
 57 |     def reset(cls):
 58 |         """Reset the registry to the standard codecs."""
 59 |         cls._codecs = {}
 60 |         c = cls._codec
 61 |         for (name, encode, decode) in cls._common_codec_data:
 62 |             cls._codecs[name] = c(encode, decode)
 63 | 
 64 |     @classmethod
 65 |     def register(cls, name, encode, decode):
 66 |         """Add a codec to the registry.
 67 | 
 68 |         Registers a codec with the given `name` (a string) to be used with the
 69 |         given `encode` and `decode` functions, which take a `bytes` object and
 70 |         return another one.  An existing codec is replaced.
 71 | 
 72 |         >>> import binascii
 73 |         >>> CodecReg.register('uu', binascii.b2a_uu, binascii.a2b_uu)
 74 |         >>> CodecReg.get_decoder('uu') is binascii.a2b_uu
 75 |         True
 76 |         >>> CodecReg.reset()
 77 |         >>> 'uu' in CodecReg
 78 |         False
 79 |         """
 80 |         cls._codecs[name] = cls._codec(encode, decode)
 81 | 
 82 |     @classmethod
 83 |     def unregister(cls, name):
 84 |         """Remove a codec from the registry.
 85 | 
 86 |         Unregisters the codec with the given `name` (a string).  If the codec
 87 |         is not registered, a `KeyError` is raised.
 88 | 
 89 |         >>> import binascii
 90 |         >>> CodecReg.register('uu', binascii.b2a_uu, binascii.a2b_uu)
 91 |         >>> 'uu' in CodecReg
 92 |         True
 93 |         >>> CodecReg.unregister('uu')
 94 |         >>> 'uu' in CodecReg
 95 |         False
 96 |         """
 97 |         del cls._codecs[name]
 98 | 
 99 |     @classmethod
100 |     def get_encoder(cls, encoding):
101 |         r"""Return an encoder for the given `encoding`.
102 | 
103 |         The encoder gets a `bytes` object as argument and returns another
104 |         encoded `bytes` object.  If the `encoding` is not registered, a
105 |         `KeyError` is raised.
106 | 
107 |         >>> encode = CodecReg.get_encoder('hex')
108 |         >>> encode(b'FOO\x00')
109 |         b'464f4f00'
110 |         """
111 |         return cls._codecs[encoding].encode
112 | 
113 |     @classmethod
114 |     def get_decoder(cls, encoding):
115 |         r"""Return a decoder for the given `encoding`.
116 | 
117 |         The decoder gets a `bytes` object as argument and returns another
118 |         decoded `bytes` object.  If the `encoding` is not registered, a
119 |         `KeyError` is raised.
120 | 
121 |         >>> decode = CodecReg.get_decoder('hex')
122 |         >>> decode(b'464f4f00')
123 |         b'FOO\x00'
124 |         """
125 |         return cls._codecs[encoding].decode
126 | 
127 | # Initialize the codec registry.
128 | CodecReg.reset()
129 | 


--------------------------------------------------------------------------------
/visualization/multihash/funcs.py:
--------------------------------------------------------------------------------
  1 | # pymultihash: Python implementation of the multihash specification
  2 | #
  3 | # Initial author: Ivan Vilata-i-Balaguer
  4 | # License: MIT
  5 | 
  6 | """Enumeration of standard multihash functions, and function registry"""
  7 | 
  8 | from collections import namedtuple
  9 | from enum import Enum
 10 | from numbers import Integral
 11 | 
 12 | # Import standard hashlib-compatible modules.
 13 | import hashlib
 14 | 
 15 | # Try to import known optional hashlib-compatible modules.
 16 | try:
 17 |     import sha3
 18 | except ImportError:
 19 |     sha3 = None
 20 | try:
 21 |     import pyblake2 as blake2
 22 | except ImportError:
 23 |     blake2 = None
 24 | 
 25 | 
 26 | def _is_app_specific_func(code):
 27 |     """Is the given hash function integer `code` application-specific?"""
 28 |     return isinstance(code, Integral) and (0x00 <= code <= 0x0f)
 29 | 
 30 | 
 31 | class Func(Enum):
 32 |     """An enumeration of hash functions supported by multihash.
 33 | 
 34 |     The name of each member has its hyphens replaced by underscores.
 35 |     The value of each member corresponds to its integer code.
 36 | 
 37 |     >>> Func.sha2_512.value == 0x13
 38 |     True
 39 |     """
 40 |     sha1 = 0x11
 41 |     sha2_256 = 0x12
 42 |     sha2_512 = 0x13
 43 |     # See jbenet/multihash#11 for new SHA-3 function names and codes.
 44 |     sha3_512 = 0x14
 45 |     sha3 = sha3_512  # deprecated, for backwards compatibility
 46 |     sha3_384 = 0x15
 47 |     sha3_256 = 0x16
 48 |     sha3_224 = 0x17
 49 |     shake_128 = 0x18
 50 |     shake_256 = 0x19
 51 |     blake2b = 0x40
 52 |     blake2s = 0x41
 53 | 
 54 | 
 55 | class _FuncRegMeta(type):
 56 |     def __contains__(self, func):
 57 |         """Return whether `func` is a registered function.
 58 | 
 59 |         >>> FuncReg.reset()
 60 |         >>> Func.sha2_256 in FuncReg
 61 |         True
 62 |         """
 63 |         return func in self._func_hash
 64 | 
 65 |     def __iter__(self):
 66 |         """Iterate over registered functions.
 67 | 
 68 |         Standard multihash functions are represented as members of `Func`,
 69 |         while application-specific functions are integers.
 70 | 
 71 |         >>> FuncReg.reset()
 72 |         >>> set(FuncReg) == set(Func)
 73 |         True
 74 |         """
 75 |         return iter(self._func_hash)
 76 | 
 77 | 
 78 | class FuncReg(metaclass=_FuncRegMeta):
 79 |     """Registry of supported hash functions."""
 80 | 
 81 |     # Standard hash function data.
 82 |     _std_func_data = [  # (func, hash name, hash new)
 83 |         (Func.sha1, 'sha1', hashlib.sha1),
 84 | 
 85 |         (Func.sha2_256, 'sha256', hashlib.sha256),
 86 |         (Func.sha2_512, 'sha512', hashlib.sha512),
 87 | 
 88 |         (Func.sha3_512, 'sha3_512', sha3.sha3_512 if sha3 else None),
 89 |         (Func.sha3_384, 'sha3_384', sha3.sha3_384 if sha3 else None),
 90 |         (Func.sha3_256, 'sha3_256', sha3.sha3_256 if sha3 else None),
 91 |         (Func.sha3_224, 'sha3_224', sha3.sha3_224 if sha3 else None),
 92 | 
 93 |         (Func.shake_128, 'shake_128', None),
 94 |         (Func.shake_256, 'shake_256', None),
 95 | 
 96 |         (Func.blake2b, 'blake2b', blake2.blake2b if blake2 else None),
 97 |         (Func.blake2s, 'blake2s', blake2.blake2s if blake2 else None)]
 98 | 
 99 |     # Hashlib compatibility data for a hash: hash name (e.g. ``sha256`` for
100 |     # SHA-256, ``sha2-256`` in multihash), and the corresponding constructor.
101 |     _hash = namedtuple('hash', 'name new')
102 | 
103 |     @classmethod
104 |     def reset(cls):
105 |         """Reset the registry to the standard multihash functions."""
106 |         # Maps function names (hyphens or underscores) to registered functions.
107 |         cls._func_from_name = {}
108 | 
109 |         # Maps hashlib names to registered functions.
110 |         cls._func_from_hash = {}
111 | 
112 |         # Hashlib compatibility data by function.
113 |         cls._func_hash = {}
114 | 
115 |         register = cls._do_register
116 |         for (func, hash_name, hash_new) in cls._std_func_data:
117 |             register(func, func.name, hash_name, hash_new)
118 |         assert set(cls._func_hash) == set(Func)
119 | 
120 |     @classmethod
121 |     def get(cls, func_hint):
122 |         """Return a registered hash function matching the given hint.
123 | 
124 |         The hint may be a `Func` member, a function name (with hyphens or
125 |         underscores), or its code.  A `Func` member is returned for standard
126 |         multihash functions and an integer code for application-specific ones.
127 |         If no matching function is registered, a `KeyError` is raised.
128 | 
129 |         >>> fm = FuncReg.get(Func.sha2_256)
130 |         >>> fnu = FuncReg.get('sha2_256')
131 |         >>> fnh = FuncReg.get('sha2-256')
132 |         >>> fc = FuncReg.get(0x12)
133 |         >>> fm == fnu == fnh == fc
134 |         True
135 |         """
136 |         # Different possibilities of `func_hint`, most to least probable.
137 |         try:  # `Func` member (or its value)
138 |             return Func(func_hint)
139 |         except ValueError:
140 |             pass
141 |         if func_hint in cls._func_from_name:  # `Func` member name, extended
142 |             return cls._func_from_name[func_hint]
143 |         if func_hint in cls._func_hash:  # registered app-specific code
144 |             return func_hint
145 |         raise KeyError("unknown hash function", func_hint)
146 | 
147 |     @classmethod
148 |     def _do_register(cls, code, name, hash_name=None, hash_new=None):
149 |         """Add hash function data to the registry without checks."""
150 |         cls._func_from_name[name.replace('-', '_')] = code
151 |         cls._func_from_name[name.replace('_', '-')] = code
152 |         if hash_name:
153 |             cls._func_from_hash[hash_name] = code
154 |         cls._func_hash[code] = cls._hash(hash_name, hash_new)
155 | 
156 |     @classmethod
157 |     def register(cls, code, name, hash_name=None, hash_new=None):
158 |         """Add an application-specific function to the registry.
159 | 
160 |         Registers a function with the given `code` (an integer) and `name` (a
161 |         string, which is added both with only hyphens and only underscores),
162 |         as well as an optional `hash_name` and `hash_new` constructor for
163 |         hashlib compatibility.  If the application-specific function is
164 |         already registered, the related data is replaced.  Registering a
165 |         function with a `code` not in the application-specific range
166 |         (0x00-0xff) or with names already registered for a different function
167 |         raises a `ValueError`.
168 | 
169 |         >>> import hashlib
170 |         >>> FuncReg.register(0x05, 'md-5', 'md5', hashlib.md5)
171 |         >>> FuncReg.get('md-5') == FuncReg.get('md_5') == 0x05
172 |         True
173 |         >>> hashobj = FuncReg.hash_from_func(0x05)
174 |         >>> hashobj.name == 'md5'
175 |         True
176 |         >>> FuncReg.func_from_hash(hashobj) == 0x05
177 |         True
178 |         >>> FuncReg.reset()
179 |         >>> 0x05 in FuncReg
180 |         False
181 |         """
182 |         if not _is_app_specific_func(code):
183 |             raise ValueError(
184 |                 "only application-specific functions can be registered")
185 |         # Check already registered name in different mappings.
186 |         name_mapping_data = [  # (mapping, name in mapping, error if existing)
187 |             (cls._func_from_name, name,
188 |              "function name is already registered for a different function"),
189 |             (cls._func_from_hash, hash_name,
190 |              "hashlib name is already registered for a different function")]
191 |         for (mapping, nameinmap, errmsg) in name_mapping_data:
192 |             existing_func = mapping.get(nameinmap, code)
193 |             if existing_func != code:
194 |                 raise ValueError(errmsg, existing_func)
195 |         # Unregister if existing to ensure no orphan entries.
196 |         if code in cls._func_hash:
197 |             cls.unregister(code)
198 |         # Proceed to registration.
199 |         cls._do_register(code, name, hash_name, hash_new)
200 | 
201 |     @classmethod
202 |     def unregister(cls, code):
203 |         """Remove an application-specific function from the registry.
204 | 
205 |         Unregisters the function with the given `code` (an integer).  If the
206 |         function is not registered, a `KeyError` is raised.  Unregistering a
207 |         function with a `code` not in the application-specific range
208 |         (0x00-0xff) raises a `ValueError`.
209 | 
210 |         >>> import hashlib
211 |         >>> FuncReg.register(0x05, 'md-5', 'md5', hashlib.md5)
212 |         >>> FuncReg.get('md-5')
213 |         5
214 |         >>> FuncReg.unregister(0x05)
215 |         >>> FuncReg.get('md-5')
216 |         Traceback (most recent call last):
217 |             ...
218 |         KeyError: ('unknown hash function', 'md-5')
219 |         """
220 |         if code in Func:
221 |             raise ValueError(
222 |                 "only application-specific functions can be unregistered")
223 |         # Remove mapping to function by name.
224 |         func_names = {n for (n, f) in cls._func_from_name.items() if f == code}
225 |         for func_name in func_names:
226 |             del cls._func_from_name[func_name]
227 |         # Remove hashlib data and mapping to hash.
228 |         hash = cls._func_hash.pop(code)
229 |         if hash.name:
230 |             del cls._func_from_hash[hash.name]
231 | 
232 |     @classmethod
233 |     def func_from_hash(cls, hash):
234 |         """Return the multihash `Func` for the hashlib-compatible `hash` object.
235 | 
236 |         If no `Func` is registered for the given hash, a `KeyError` is raised.
237 | 
238 |         >>> import hashlib
239 |         >>> h = hashlib.sha256()
240 |         >>> f = FuncReg.func_from_hash(h)
241 |         >>> f is Func.sha2_256
242 |         True
243 |         """
244 |         return cls._func_from_hash[hash.name]
245 | 
246 |     @classmethod
247 |     def hash_from_func(cls, func):
248 |         """Return a hashlib-compatible object for the multihash `func`.
249 | 
250 |         If the `func` is registered but no hashlib-compatible constructor is
251 |         available for it, `None` is returned.  If the `func` is not
252 |         registered, a `KeyError` is raised.
253 | 
254 |         >>> h = FuncReg.hash_from_func(Func.sha2_256)
255 |         >>> h.name
256 |         'sha256'
257 |         """
258 |         new = cls._func_hash[func].new
259 |         return new() if new else None
260 | 
261 | # Initialize the function hash registry.
262 | FuncReg.reset()
263 | 


--------------------------------------------------------------------------------
/visualization/multihash/multihash.py:
--------------------------------------------------------------------------------
  1 | # pymultihash: Python implementation of the multihash specification
  2 | #
  3 | # Initial author: Ivan Vilata-i-Balaguer
  4 | # License: MIT
  5 | 
  6 | """Multihash class and utility functions"""
  7 | 
  8 | from collections import namedtuple
  9 | 
 10 | import base64
 11 | 
 12 | from multihash.funcs import _is_app_specific_func, Func, FuncReg
 13 | from multihash.codecs import CodecReg
 14 | 
 15 | 
 16 | def _do_digest(data, func):
 17 |     """Return the binary digest of `data` with the given `func`."""
 18 |     func = FuncReg.get(func)
 19 |     hash = FuncReg.hash_from_func(func)
 20 |     if not hash:
 21 |         raise ValueError("no available hash function for hash", func)
 22 |     hash.update(data)
 23 |     return bytes(hash.digest())
 24 | 
 25 | 
 26 | class Multihash(namedtuple('Multihash', 'func digest')):
 27 |     """A named tuple representing a multihash function and digest.
 28 | 
 29 |     The hash function is usually a `Func` member.
 30 | 
 31 |     >>> mh = Multihash(Func.sha1, b'BINARY_DIGEST')
 32 |     >>> mh == (Func.sha1, b'BINARY_DIGEST')
 33 |     True
 34 |     >>> mh == (mh.func, mh.digest)
 35 |     True
 36 | 
 37 |     However it can also be its integer value (the function code) or its string
 38 |     name (the function name, with either underscore or hyphen).
 39 | 
 40 |     >>> mhfc = Multihash(Func.sha1.value, mh.digest)
 41 |     >>> mhfc == mh
 42 |     True
 43 |     >>> mhfn = Multihash('sha2-256', b'...')
 44 |     >>> mhfn.func is Func.sha2_256
 45 |     True
 46 | 
 47 |     Application-specific codes (0x00-0x0f) are also accepted.  Other codes
 48 |     raise a `KeyError`.
 49 | 
 50 |     >>> mhfc = Multihash(0x01, b'...')
 51 |     >>> mhfc.func
 52 |     1
 53 |     >>> mhfc = Multihash(1234, b'...')
 54 |     Traceback (most recent call last):
 55 |         ...
 56 |     KeyError: ('unknown hash function', 1234)
 57 |     """
 58 |     __slots__ = ()
 59 | 
 60 |     def __new__(cls, func, digest):
 61 |         try:
 62 |             func = FuncReg.get(func)
 63 |         except KeyError:
 64 |             if _is_app_specific_func(func):
 65 |                 # Application-specific function codes
 66 |                 # are allowed even if not registered.
 67 |                 func = int(func)
 68 |             else:
 69 |                 raise
 70 |         digest = bytes(digest)
 71 |         return super(cls, Multihash).__new__(cls, func, digest)
 72 | 
 73 |     @classmethod
 74 |     def from_hash(self, hash):
 75 |         """Create a `Multihash` from a hashlib-compatible `hash` object.
 76 | 
 77 |         >>> import hashlib
 78 |         >>> data = b'foo'
 79 |         >>> hash = hashlib.sha1(data)
 80 |         >>> digest = hash.digest()
 81 |         >>> mh = Multihash.from_hash(hash)
 82 |         >>> mh == (Func.sha1, digest)
 83 |         True
 84 | 
 85 |         Application-specific hash functions are also supported (see
 86 |         `FuncReg`).
 87 | 
 88 |         If there is no matching multihash hash function for the given `hash`,
 89 |         a `ValueError` is raised.
 90 |         """
 91 |         try:
 92 |             func = FuncReg.func_from_hash(hash)
 93 |         except KeyError as ke:
 94 |             raise ValueError(
 95 |                 "no matching multihash function", hash.name) from ke
 96 |         digest = hash.digest()
 97 |         return Multihash(func, digest)
 98 | 
 99 |     def __str__(self):
100 |         """Return a compact string representation of the multihash.
101 | 
102 |         The representation includes the name of the standard multihash
103 |         function or the hexadecimal code of the application-specific one, and
104 |         a Base64-encoded version of the raw digest.  This is *not* the
105 |         complete multihash-encoded digest that can be obtained with
106 |         `Multihash.encode()`.
107 | 
108 |         >>> mh = Multihash(Func.sha1, b'TEST')
109 |         >>> print(mh)
110 |         Multihash(sha1, b64:VEVTVA==)
111 |         >>> mh = Multihash(0x01, b'TEST')
112 |         >>> print(mh)
113 |         Multihash(0x1, b64:VEVTVA==)
114 |         """
115 |         return 'Multihash({func}, b64:{digest})'.format(
116 |             func=self.func.name if self.func in Func else hex(self.func),
117 |             digest=base64.b64encode(self.digest).decode()
118 |         )
119 | 
120 |     def encode(self, encoding=None):
121 |         r"""Encode into a multihash-encoded digest.
122 | 
123 |         If `encoding` is `None`, a binary digest is produced:
124 | 
125 |         >>> mh = Multihash(0x01, b'TEST')
126 |         >>> mh.encode()
127 |         b'\x01\x04TEST'
128 | 
129 |         If the name of an `encoding` is specified, it is used to encode the
130 |         binary digest before returning it (see `CodecReg` for supported
131 |         codecs).
132 | 
133 |         >>> mh.encode('base64')
134 |         b'AQRURVNU'
135 | 
136 |         If the `encoding` is not available, a `KeyError` is raised.
137 |         """
138 |         try:
139 |             fc = self.func.value
140 |         except AttributeError:  # application-specific function code
141 |             fc = self.func
142 |         mhash = bytes([fc, len(self.digest)]) + self.digest
143 |         if encoding:
144 |             mhash = CodecReg.get_encoder(encoding)(mhash)
145 |         return mhash
146 | 
147 |     def verify(self, data):
148 |         r"""Does the given `data` hash to the digest in this `Multihash`?
149 | 
150 |         >>> import hashlib
151 |         >>> data = b'foo'
152 |         >>> hash = hashlib.sha1(data)
153 |         >>> mh = Multihash.from_hash(hash)
154 |         >>> mh.verify(data)
155 |         True
156 |         >>> mh.verify(b'foobar')
157 |         False
158 | 
159 |         Application-specific hash functions are also supported (see
160 |         `FuncReg`).
161 |         """
162 |         digest = _do_digest(data, self.func)
163 |         return digest[:len(self.digest)] == self.digest
164 | 
165 |     def truncate(self, length):
166 |         """Return a new `Multihash` with a shorter digest `length`.
167 | 
168 |         If the given `length` is greater than the original, a `ValueError`
169 |         is raised.
170 | 
171 |         >>> mh1 = Multihash(0x01, b'FOOBAR')
172 |         >>> mh2 = mh1.truncate(3)
173 |         >>> mh2 == (0x01, b'FOO')
174 |         True
175 |         >>> mh3 = mh1.truncate(10)
176 |         Traceback (most recent call last):
177 |             ...
178 |         ValueError: cannot enlarge the original digest by 4 bytes
179 |         """
180 |         if length > len(self.digest):
181 |             raise ValueError("cannot enlarge the original digest by %d bytes"
182 |                              % (length - len(self.digest)))
183 |         return self.__class__(self.func, self.digest[:length])
184 | 
185 | 
186 | def digest(data, func):
187 |     """Hash the given `data` into a new `Multihash`.
188 | 
189 |     The given hash function `func` is used to perform the hashing.  It must be
190 |     a registered hash function (see `FuncReg`).
191 | 
192 |     >>> data = b'foo'
193 |     >>> mh = digest(data, Func.sha1)
194 |     >>> mh.encode('base64')
195 |     b'ERQL7se16j8P28ldDdR/PFvCddqKMw=='
196 |     """
197 |     digest = _do_digest(data, func)
198 |     return Multihash(func, digest)
199 | 
200 | 
201 | def decode(mhash, encoding=None):
202 |     r"""Decode a multihash-encoded digest into a `Multihash`.
203 | 
204 |     If `encoding` is `None`, a binary digest is assumed.
205 | 
206 |     >>> mhash = b'\x11\x0a\x0b\xee\xc7\xb5\xea?\x0f\xdb\xc9]'
207 |     >>> mh = decode(mhash)
208 |     >>> mh == (Func.sha1, mhash[2:])
209 |     True
210 | 
211 |     If the name of an `encoding` is specified, it is used to decode the digest
212 |     before parsing it (see `CodecReg` for supported codecs).
213 | 
214 |     >>> import base64
215 |     >>> emhash = base64.b64encode(mhash)
216 |     >>> emh = decode(emhash, 'base64')
217 |     >>> emh == mh
218 |     True
219 | 
220 |     If the `encoding` is not available, a `KeyError` is raised.  If the digest
221 |     has an invalid format or contains invalid data, a `ValueError` is raised.
222 |     """
223 |     mhash = bytes(mhash)
224 |     if encoding:
225 |         mhash = CodecReg.get_decoder(encoding)(mhash)
226 |     try:
227 |         func = mhash[0]
228 |         length = mhash[1]
229 |         digest = mhash[2:]
230 |     except IndexError as ie:
231 |         raise ValueError("multihash is too short") from ie
232 |     if length != len(digest):
233 |         raise ValueError(
234 |             "multihash length field does not match digest field length")
235 |     return Multihash(func, digest)
236 | 


--------------------------------------------------------------------------------
/visualization/multihash/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.9.0.dev1'
2 | 


--------------------------------------------------------------------------------
/visualization/plot:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 | 
3 | cat $1 | ./log2plot.py > DATA
4 | gnuplot graph.gpi
5 | 


--------------------------------------------------------------------------------
/visualization/shrink.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import hashlib
 6 | import base58
 7 | 
 8 | def get_hex(b58_encoded_peer_id_str: str) -> str:
 9 |   """Converts base-58 multihash to hex representation"""
10 |   bytes = base58.b58decode(b58_encoded_peer_id_str)
11 |   sha256 = hashlib.sha256(bytes).digest()
12 |   return sha256.hex()[:6]
13 | 
14 | def shorten(str):
15 |   if str.startswith('Qm') or str.startswith('12D3Koo'):
16 |     if str.endswith(':'):
17 |       str = str[:-1]
18 |     return '<%s>' % (get_hex(str))
19 |   else:
20 |     return str
21 | 
22 | 
23 | for line in sys.stdin:
24 |   words = line.split()
25 |   words = tuple(map(shorten, words))
26 |   print('%s '*len(words) % words, flush=True)
27 | 


--------------------------------------------------------------------------------