├── protod ├── __init__.py ├── definition.py ├── util.py ├── field.py ├── main.py ├── decode.py └── renderer.py ├── setup.py ├── .gitignore ├── LICENSE ├── README.md └── example ├── html_renderer.py ├── json_renderer.py └── mitmproxy_proto_view.py /protod/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["decode"] 2 | from .decode import dump 3 | from .renderer import ConsoleRenderer, Renderer 4 | -------------------------------------------------------------------------------- /protod/definition.py: -------------------------------------------------------------------------------- 1 | class WireType: 2 | Varint = 0 3 | Fixed64 = 1 4 | Struct = 2 5 | Deprecated_3 = 3 6 | Deprecated_4 = 4 7 | Fixed32 = 5 8 | 9 | 10 | def wire_type_str(t): 11 | if t == WireType.Varint: 12 | return "varint" 13 | elif t == WireType.Fixed64: 14 | return "fixed64/double" 15 | elif t == WireType.Struct: 16 | return "string" 17 | elif t == WireType.Fixed32: 18 | return "fixed32/float" 19 | else: 20 | return "Unknown wire type" 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | github = "https://github.com/aj3423/protod/" 4 | 5 | setup( 6 | name="protod", 7 | version="24.4.10", 8 | description="Decode protobuf without message definition.", 9 | url=github, 10 | author="aj3423", 11 | packages=find_packages(), 12 | install_requires=["chardet", "charset_normalizer", "protobuf", "termcolor"], 13 | entry_points={"console_scripts": ["protod=protod.main:dummy"]}, 14 | long_description="See: " + github, 15 | ) 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # Unit test / coverage reports 28 | .tox/ 29 | .coverage 30 | .cache 31 | nosetests.xml 32 | coverage.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Rope 43 | .ropeproject 44 | 45 | # Django stuff: 46 | *.log 47 | *.pot 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | .vscode/ 52 | sample.proto 53 | -------------------------------------------------------------------------------- /protod/util.py: -------------------------------------------------------------------------------- 1 | import chardet 2 | import charset_normalizer 3 | 4 | 5 | # try to detect the encoding of an string 6 | # return ( 7 | # decoded bytes: bytes 8 | # encoding name: str 9 | # decoding succeeded: bool 10 | # ) 11 | def detect_multi_charset(view) -> tuple[bytes, str, bool]: 12 | view_bytes = view.tobytes() 13 | try: 14 | # `chardet` is way more accurate, but very slow with large bytes(4 seconds on 50k bytes) 15 | # `charset_normalizer` shows wrong result with small bytes, but very performant with long bytes 16 | if len(view_bytes) <= 200: 17 | detected = chardet.detect(view_bytes) 18 | else: 19 | detected = charset_normalizer.detect(view_bytes) 20 | 21 | if detected["confidence"] < 0.9: 22 | raise Exception() 23 | 24 | encoding = detected["encoding"] 25 | 26 | decoded = view_bytes.decode(encoding) 27 | 28 | return decoded, encoding, True 29 | except: 30 | pass 31 | 32 | return view_bytes, "", False 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 aj3423 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Decode protobuf without proto. 2 | ## Try it online 3 | http://168.138.55.177/ 4 | # Screenshot 5 |  6 | ## Install 7 | `pip install protod` 8 | ## The command line tool 9 | 10 | - `protod 080102...` 11 | - `protod '08 01 02...'` (with space/tab/newline) 12 | - `protod --b64 CAEIAQ==` 13 | - `protod --file ~/pb.bin` 14 | - `protod` for help 15 | 16 | ## library protod 17 | It uses different `Renderer` to generate different output: 18 | - For console: 19 | ```python 20 | print(protod.dump(proto_bytes)) # ConsoleRenderer is used by default 21 | ``` 22 | 23 | There are [examples](https://github.com/aj3423/protod/blob/master/example) demonstrate how to write custom `Renderer`s: 24 | - json 25 | 26 |  27 | 28 | - html 29 | 30 |  31 | 32 | - Mitmproxy addon: 33 | 34 |  35 | 36 | -------------------------------------------------------------------------------- /example/html_renderer.py: -------------------------------------------------------------------------------- 1 | from protod import ConsoleRenderer 2 | 3 | 4 | # The HtmlRenderer builds a full html div string, 5 | # which can be simply set to a
len(view): 75 | raise Exception("not enough data for wire type 2(string)") 76 | 77 | view_field = view[pos : pos + s_len] 78 | pos += s_len 79 | 80 | as_str, encoding, is_str = str_decoder(view_field) 81 | ret = Struct(view_field, as_str, encoding, is_str) 82 | 83 | try: 84 | # if decode successfully, it's child struct, not just binary bytes 85 | ret.as_fields = decode_all_fields(str_decoder, ret, view_field) 86 | except: 87 | pass 88 | 89 | elif wire_type == WireType.Deprecated_3: # 3 90 | raise Exception("[proto 3] found, looks like invalid proto bytes") 91 | 92 | elif wire_type == WireType.Deprecated_4: # 4 93 | raise Exception("[proto 4] found, looks like invalid proto bytes") 94 | else: 95 | raise Exception(f"Unknown wire type {wire_type} of id_type {id_type}") 96 | 97 | ret.idtype = IdType(id, wire_type, idtype_bytes) 98 | ret.parent = parent 99 | 100 | return ret, pos 101 | 102 | 103 | def decode_all_fields(str_decoder, parent: Field, view: memoryview) -> List[Field]: 104 | pos = 0 105 | fields = [] 106 | 107 | while pos < len(view): 108 | try: 109 | field, field_len = decode_1_field(str_decoder, parent, view[pos:]) 110 | except: 111 | raise Exception(f"field: {view[pos:].tobytes()}") 112 | 113 | fields.append(field) 114 | pos += field_len 115 | 116 | # group fields with same id to a RepeatedField 117 | ret = [] 118 | for _, group in itertools.groupby(fields, lambda f: f.idtype.id): 119 | 120 | items = list(group) 121 | 122 | if len(items) == 1: # single field 123 | ret.append(items[0]) 124 | else: # repeated fields 125 | repeated = RepeatedField(items) 126 | repeated.idtype = items[0].idtype 127 | repeated.parent = items[0].parent 128 | ret.append(repeated) 129 | 130 | return ret 131 | 132 | 133 | def dump( 134 | data: bytes, 135 | renderer=None, 136 | str_decoder=detect_multi_charset, 137 | ): 138 | if renderer == None: 139 | renderer = ConsoleRenderer() 140 | 141 | view = memoryview(data) 142 | 143 | fields = decode_all_fields(str_decoder=str_decoder, parent=None, view=view) 144 | 145 | for ch in fields: 146 | ch.render(renderer) 147 | 148 | return renderer.build_result() 149 | -------------------------------------------------------------------------------- /protod/renderer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from html import escape 3 | 4 | from termcolor import colored 5 | 6 | from .definition import wire_type_str 7 | 8 | 9 | # Field formatter and colorizer 10 | class Renderer(ABC): 11 | # Return the final result, which can be in different formats, eg: 12 | # for console, it's a string with ansi color 13 | # for mitmproxy, it's an array of tuple 14 | @abstractmethod 15 | def build_result(self): 16 | pass 17 | 18 | # render repeated fields 19 | @abstractmethod 20 | def render_repeated_fields(self, repeated): 21 | pass 22 | 23 | # render varint 24 | @abstractmethod 25 | def render_varint(self, varint): 26 | pass 27 | 28 | # render fixed32/fixed64 29 | @abstractmethod 30 | def render_fixed(self, fixed): 31 | pass 32 | 33 | # render struct 34 | @abstractmethod 35 | def render_struct(self, struct): 36 | pass 37 | 38 | 39 | class ConsoleRenderer(Renderer): 40 | # Long binary data that exceeds `n` bytes is truncated and followed by a '...' 41 | # use a large value like 1000000 to 'not' truncate 42 | # default: 32 43 | def __init__(self, truncate_after=32, no_color=False): 44 | self.cells = [] 45 | self.truncate_after = truncate_after 46 | self.no_color = no_color 47 | 48 | def build_result(self): 49 | return "".join(self.cells) 50 | 51 | def render_repeated_fields(self, repeated): 52 | for ch in repeated.items: 53 | ch.render(self) 54 | 55 | def render_varint(self, varint): 56 | self._render_idtype(varint.indent_level(), varint.idtype) 57 | 58 | self._add_num(str(varint.i64)) 59 | self._add_normal(" (") 60 | self._add_num(str(hex(varint.u64))) 61 | self._add_normal(")") 62 | 63 | self._add_newline() 64 | 65 | def render_fixed(self, fixed): 66 | self._render_idtype(fixed.indent_level(), fixed.idtype) 67 | 68 | u, i, f = fixed.u, fixed.i, fixed.f 69 | 70 | self._add_normal(str(u)) # show unsigned form 71 | if i < 0: # also show signed value if it's negative 72 | self._add_normal(f" ({str(i)})") 73 | 74 | self._add_normal(f" ({hex(u)}) ({str(f)})") # show hex and float form 75 | self._add_newline() 76 | 77 | def render_struct(self, struct): 78 | self._render_idtype(struct.indent_level(), struct.idtype) 79 | 80 | self._add_normal(f"({str(len(struct.view))}) ") 81 | 82 | if struct.is_str: 83 | if struct.as_fields: 84 | if struct.as_str.isprintable(): 85 | self._render_str(struct.as_str, struct.encoding) 86 | self._add_newline() 87 | else: 88 | self._render_str(struct.as_str, struct.encoding) 89 | 90 | # Also show hex if: 91 | # 1. it contains non-printable characters 92 | # 2. it is short, less than 8 bytes 93 | if not struct.as_str.isprintable() or 0 < len(struct.view) <= 8: 94 | self._add_normal(" (") 95 | self._add_bin(struct.view) 96 | self._add_normal(")") 97 | self._add_newline() 98 | 99 | else: 100 | if not struct.as_fields: 101 | # show as binary 102 | self._add_bin(struct.view) 103 | self._add_newline() 104 | 105 | # 2. show as child struct 106 | if struct.as_fields: 107 | self._add_newline() 108 | for ch in struct.as_fields: 109 | ch.render(self) 110 | 111 | ########################### 112 | 113 | def _render_idtype(self, indent_level, idtype): 114 | self._add_indent(indent_level) 115 | self._add_normal("[") 116 | self._add_idtype(" ".join(format(x, "02x") for x in idtype.raw_bytes)) 117 | self._add_normal("] ") 118 | self._add_id(str(idtype.id) + " ") 119 | self._add_type(wire_type_str(idtype.wire_type)) 120 | self._add_normal(": ") 121 | 122 | def _add_newline(self): 123 | self._add("\n") 124 | 125 | def _render_str(self, string: str, encoding: str): 126 | if encoding not in ["ascii"]: 127 | self._add_normal(f"[{encoding}] ") 128 | self._add_str(string) 129 | 130 | def _add(self, cell): 131 | self.cells.append(cell) 132 | pass 133 | 134 | def _add_indent(self, level): 135 | self._add(" " * 4 * level) 136 | 137 | def _add_normal(self, s): 138 | self._add(s) 139 | 140 | def _add_idtype(self, s): 141 | self._add(colored(s, "light_red", no_color=self.no_color)) 142 | 143 | def _add_id(self, s): 144 | self._add(colored(s, "light_green", no_color=self.no_color)) 145 | 146 | def _add_type(self, s): 147 | self._add(colored(s, "yellow", no_color=self.no_color)) 148 | 149 | def _add_num(self, s): 150 | self._add(colored(s, "light_cyan", no_color=self.no_color)) 151 | 152 | def _add_str(self, s): 153 | self._add(colored(s, "light_blue", no_color=self.no_color)) 154 | 155 | def _add_bin(self, s): 156 | truncated = s[: self.truncate_after] 157 | self._add( 158 | colored( 159 | " ".join(format(x, "02x") for x in truncated), 160 | "light_yellow", 161 | no_color=self.no_color, 162 | ) 163 | ) 164 | if len(s) > self.truncate_after: 165 | self._add_normal(" ...") 166 | --------------------------------------------------------------------------------