├── .gitignore
├── .vscode
    └── tasks.json
├── doc
    ├── example-a.md
    ├── example-b.md
    ├── example-git-diff.png
    └── example-same-same.png
├── install-dev.sh
├── readme.md
├── same-same.py
└── setup.cfg


/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "2.0.0",
 3 |   "echoCommand": false,
 4 |   "type": "process",
 5 |   "presentation": {},
 6 |   "tasks": [
 7 |     {
 8 |       "label": "typecheck-current",
 9 |       "command": "craft-py-check",
10 |       "args": ["${relativeFile}"],
11 |       "group": {"kind": "build", "isDefault": true},
12 |       "problemMatcher": "$craft"
13 |     },
14 |     {
15 |       "label": "typecheck",
16 |       "command": "craft-py-check",
17 |       "args": ["pithy"],
18 |       "group": "build",
19 |       "problemMatcher": "$craft"
20 |     },
21 |     {
22 |       "label": "test and typecheck",
23 |       "command": "make",
24 |       "args": [],
25 |       "group": {"kind": "test", "isDefault": true},
26 |       "problemMatcher": "$craft"
27 |     }
28 |   ]
29 | }
30 | 


--------------------------------------------------------------------------------
/doc/example-a.md:
--------------------------------------------------------------------------------
 1 | # Same-same Example
 2 | 
 3 | ## Token highlighting
 4 | 
 5 | This is the left side.
 6 | 
 7 | ## Movement
 8 | 
 9 | ### Section 2
10 | This section gets moved down.
11 | 
12 | ### Section 1
13 | This section stays put.
14 | 
15 | ## Invisible Characters
16 | 	tab indented.
17 | 


--------------------------------------------------------------------------------
/doc/example-b.md:
--------------------------------------------------------------------------------
 1 | # Same-same Example
 2 | 
 3 | ## Token highlighting
 4 | 
 5 | This is the right side.
 6 | 
 7 | ## Movement
 8 | 
 9 | ### Section 1
10 | This section stays put.
11 | 
12 | ### Section 2
13 | This section was moved down.
14 | 
15 | ## Invisible Characters
16 |   space indented.
17 | 


--------------------------------------------------------------------------------
/doc/example-git-diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gwk/same-same/065674668bf26dd2bcc62ab7a556629d21647fe4/doc/example-git-diff.png


--------------------------------------------------------------------------------
/doc/example-same-same.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gwk/same-same/065674668bf26dd2bcc62ab7a556629d21647fe4/doc/example-same-same.png


--------------------------------------------------------------------------------
/install-dev.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | 
3 | ln -s $PWD/same-same.py /usr/local/bin/same-same
4 | chmod +x /usr/local/bin/same-same
5 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Same-same: a Git diff highlighter
  2 | 
  3 | Same-same is a git diff highlighter like Git's [contrib/diff-highlight](https://github.com/git/git/tree/master/contrib/diff-highlight) and [diff-so-fancy](https://github.com/so-fancy/diff-so-fancy).
  4 | 
  5 | The highlighter accomplishes several things:
  6 | * Highlights add/remove lines using background colors.
  7 | * Tokenizes changed blocks and highlights per-token changes using text colors.
  8 | * Detects moved lines and highlights them with separate (yellowish) text colors.
  9 | * Distinguishes file and hunk metadata lines with background colors.
 10 | * Removes leading '+'/'-'/' ' characters from hunk lines, which makes copy/paste from the terminal more convenient.
 11 | * Reformats chunk headers to look like "dir/source.ext:line_num:", which allows editors such as VSCode to click through the source location.
 12 | * Rewrites invisible characters in the ASCII and Latin-1 Unicode blocks to display as escape codes, and highlights them using color inversion. Note that this includes the tab character, which is great if you use two-space code indentation like I do, but not so great for other settings. If someone wants to make this configurable for other tab widths, please get in touch!
 13 | 
 14 | An example diff: git's colored output on the left, and highlighted by Same-same on the right:
 15 | 
 16 | <img src="doc/example-git-diff.png" width="40%" /> <img src="doc/example-same-same.png" width="40%" />
 17 | 
 18 | 
 19 | # License
 20 | 
 21 | Same-same is dedicated to the public domain under CC0: https://creativecommons.org/publicdomain/zero/1.0/.
 22 | 
 23 | 
 24 | # Requirements
 25 | 
 26 | Same-same currently requires Python 3.x and an `xterm-256` compatible terminal, as it generates 256-color output.
 27 | 
 28 | Same-same has been tested only on macOS 10.13 and Apple Terminal.
 29 | 
 30 | 
 31 | # Installation
 32 | 
 33 | The program is a standalone Python script. To install, just copy it to some location on your shell's PATH, e.g.:
 34 | 
 35 |     .../same-same $ cp same-same.py /usr/local/bin/same-same
 36 | 
 37 | Or you can install the program as a symlink to the source file in the developer directory:
 38 | 
 39 |    .../same-same $ ./install-dev.sh
 40 | 
 41 | ## Configure Git
 42 | 
 43 | Then update your git configuration:
 44 | 
 45 |     $ git config --global core.pager 'same-same | LESSANSIENDCHARS=mK less --RAW-CONTROL-CHARS'
 46 |     $ git config --global interactive.diffFilter 'same-same -interactive | LESSANSIENDCHARS=mK less --RAW-CONTROL-CHARS'
 47 | 
 48 | Or edit your `~/.gitconfig` or project `.gitconfig` by hand:
 49 | 
 50 |     [core]
 51 |       pager = same-same | LESSANSIENDCHARS=mK less --RAW-CONTROL-CHARS
 52 |     [interactive]
 53 |       diffFilter = same-same -interactive | LESSANSIENDCHARS=mK less --RAW-CONTROL-CHARS
 54 | 
 55 | As an alternative or in addition to `core.pager`, you can set any of `pager.log`, `pager.show`, and `pager.diff` to use different highlighter/pager combinations for the various git commands.
 56 | 
 57 | 
 58 | # Debugging
 59 | 
 60 | If `same-same` misbehaves, please report the problem (with a repro if possible) as a GitHub issue.
 61 | 
 62 | To put `same-same` in passthrough mode, set the environment variable `SAME_SAME_OFF`.
 63 | 
 64 | To put `same-same` in debug mode (just classify each line, then print its kind and repr), set `SAME_SAME_DBG`.
 65 | 
 66 | 
 67 | # Algorithms
 68 | 
 69 | Same-same does two things that are interesting algorithmically: movement detection, and per-token highlighting.
 70 | 
 71 | ## Movement detection
 72 | 
 73 | The movement detector runs first, as follows:
 74 | * Find all unique removed lines and unique added lines, ignoring whitespace (this allows for detection even with changed indentation).
 75 | * For each unique line that was both added and removed:
 76 |   * Greedily expand the moved block backwards and forwards, again ignoring whitespace but also allowing non-unique lines to match.
 77 | 
 78 | ## Per-token highlighting
 79 | 
 80 | Per-token highlighting is performed per "chunk", where a chunk is a block of consecutive removed and/or added lines. This is in contrast to git's notion of a "hunk", which also includes context lines. For each chunk:
 81 | * Assemble a list of tokens for each side, including newline tokens.
 82 | * Diff the lists on a token-by-token basis.
 83 |   * Currently we use Python's builtin `difflib.SequnceMatcher` algorithm, and treat non-newline whitespace tokens as junk (a feature of `SequenceMatcher`).
 84 |   * Anecdotally, I think that the "Patience" algorithm produces better diffs than `difflib` does, and Git's "histogram" algorithm is purported to be better still. I would love to find the time to write (or collaborate on!) a public domain implementation.
 85 | * Iterate over the resulting diff sequence and output the sequence of tokens with highlight colors inserted.
 86 | * Replace the text for each line with highlighted sequences.
 87 | 
 88 | 
 89 | # Notes
 90 | 
 91 | I learned some interesting things that may be helpful for others creating git highlighters, other git tools, and tools with color console output.
 92 | 
 93 | ## Git can do line movement detection
 94 | 
 95 | `git diff` has a new `--color-moved[=<mode>]` option to detect line movement, and offers several modes. Same-same ignores all colors in the diff and does its own movement detection. This is partly out of simplicity, but also to allow for experimentation and improvements to the movement detection algorithm.
 96 | 
 97 | ## Limitation of Git's interactive mode
 98 | 
 99 | `git add -p` (interactive staging) works by slicing the diff by line positions. Therefore if a highlighter omits or inserts lines, then the output will get sliced incorrectly and will not make sense. For this reason, `same-same -interactive` disables the omission of unhelpful metadata lines and dims them instead.
100 | 
101 | ## Coloring to end-of-line
102 | 
103 | Some terminals, including Apple Terminal, make it difficult to set the background color of a complete line without printing trailing spaces to fill the screen. Filling with spaces requires querying for the terminal width, and is an unacceptable hack because it cannot cope with a resized terminal window. [This StackOverflow answer](https://stackoverflow.com/a/20058323) shows how to use the [ANSI CSI sequence](https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences) for "erase to end-of-line" (`'\x1b[K'` or `'\x1b[0k'`) to get Terminal to highlight from the current position to the right margin.
104 | 
105 | This trick allows Same-same to use a different highlighting style than GitHub: the background color is used to indicate the line-by-line diff, and the text color is used to indicate the per-token diff.
106 | 
107 | ## Getting Less to respect the clear-to-eol trick
108 | 
109 | By default, `less` will strip out the "erase" code used above, but the `--RAW-CONTROL-CHARS` option, in conjunction with the `LESSANSIENDCHARS=mK` environment variable tells it to leave both SGR (color) and erasure sequences in the text stream.
110 | 
111 | 
112 | # Contribution
113 | 
114 | Contributors are welcome! Please get in touch via GitHub issues or email to discuss.
115 | 
116 | ## Configurable colors
117 | 
118 | One obvious addition would be to query `os.environ` for custom colors.
119 | 


--------------------------------------------------------------------------------
/same-same.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Dedicated to the public domain under CC0: https://creativecommons.org/publicdomain/zero/1.0/.
  3 | 
  4 | import re
  5 | from argparse import ArgumentParser
  6 | from difflib import SequenceMatcher
  7 | from itertools import chain, groupby
  8 | from os import environ
  9 | from sys import stderr, stdout
 10 | from typing import *
 11 | from typing import Match
 12 | 
 13 | 
 14 | class DiffLine:
 15 |   def __init__(self, kind:str, match:Match, rich_text:str) -> None:
 16 |     self.kind = kind # The name from `diff_pat` named capture groups.
 17 |     self.match = match
 18 |     self.rich_text = rich_text # Original colorized text from git.
 19 |     self.old_num = 0 # 1-indexed.
 20 |     self.new_num = 0 # ".
 21 |     self.chunk_idx = 0 # Positive for rem/add.
 22 |     self.is_src = False # True for ctx/rem/add.
 23 |     self.text = '' # Final text for ctx/rem/add.
 24 | 
 25 |   @property
 26 |   def plain_text(self) -> str:
 27 |     return self.match.string # type: ignore
 28 | 
 29 | 
 30 | def main() -> None:
 31 | 
 32 |   arg_parser = ArgumentParser(prog='same-same', description='Git diff filter.')
 33 |   arg_parser.add_argument('-interactive', action='store_true', help="Accommodate git's interactive mode.")
 34 |   args = arg_parser.parse_args()
 35 | 
 36 |   # Git can generate utf8-illegal sequences; ignore them.
 37 |   stdin = open(0, errors='replace')
 38 | 
 39 |   if 'SAME_SAME_OFF' in environ:
 40 |     for line in stdin:
 41 |       stdout.write(line)
 42 |     exit(0)
 43 | 
 44 |   dbg = ('SAME_SAME_DBG' in environ)
 45 | 
 46 |   buffer:List[DiffLine] = []
 47 | 
 48 |   def flush_buffer() -> None:
 49 |     nonlocal buffer
 50 |     if buffer:
 51 |       handle_file_lines(buffer, interactive=args.interactive)
 52 |       buffer = []
 53 | 
 54 |   try:
 55 |     for rich_text in stdin:
 56 |       rich_text = rich_text.rstrip('\n')
 57 |       plain_text = sgr_pat.sub('', rich_text) # remove colors.
 58 |       match = diff_pat.match(plain_text)
 59 |       assert match is not None
 60 |       kind = match.lastgroup
 61 |       assert kind is not None, match
 62 |       if dbg:
 63 |         print(kind, ':', repr(plain_text))
 64 |         continue
 65 |       if kind == 'diff':
 66 |         flush_buffer()
 67 |       buffer.append(DiffLine(kind, match, rich_text))
 68 |     flush_buffer()
 69 |   except BrokenPipeError:
 70 |     stderr.close() # Prevents warning message.
 71 | 
 72 | 
 73 | def handle_file_lines(lines:List[DiffLine], interactive:bool) -> None:
 74 |   first = lines[0]
 75 |   kind = first.kind
 76 |   skip = False
 77 | 
 78 |   # Detect if we should skip these lines.
 79 |   if kind not in ('diff', 'loc'): skip = True
 80 |   elif graph_pat.match(first.plain_text).end(): skip = True # type: ignore
 81 |   if skip:
 82 |     for line in lines: print(line.rich_text)
 83 |     return
 84 | 
 85 |   old_ctx_nums:Set[int] = set() # Line numbers of context lines.
 86 |   new_ctx_nums:Set[int] = set() # ".
 87 |   old_lines:Dict[int, DiffLine] = {} # Maps of line numbers to line structs.
 88 |   new_lines:Dict[int, DiffLine] = {} # ".
 89 |   old_uniques:Dict[str, Optional[int]] = {} # Maps unique line bodies to line numbers.
 90 |   new_uniques:Dict[str, Optional[int]] = {} # ".
 91 |   old_num = 0 # 1-indexed source line number.
 92 |   new_num = 0 # ".
 93 |   chunk_idx = 0 # Counter to differentiate chunks; becomes part of the groupby key.
 94 | 
 95 |   # Accumulate source lines into structures.
 96 |   old_path = '<OLD_PATH>'
 97 |   new_path = '<NEW_PATH>'
 98 |   is_prev_add_rem = False
 99 |   for line in lines:
100 |     match = line.match
101 |     kind = line.kind
102 |     is_add_rem = (kind in ('rem', 'add'))
103 |     if not is_prev_add_rem and is_add_rem: chunk_idx += 1
104 |     is_prev_add_rem = is_add_rem
105 |     if kind in ('ctx', 'rem', 'add'):
106 |       line.is_src = True
107 |       if kind == 'ctx':
108 |         line.text = match['ctx_text']
109 |       elif kind == 'rem':
110 |         line.text = match['rem_text']
111 |         line.chunk_idx = chunk_idx
112 |         insert_unique_line(old_uniques, line.text, old_num)
113 |       elif kind == 'add':
114 |         line.text = match['add_text']
115 |         line.chunk_idx = chunk_idx
116 |         insert_unique_line(new_uniques, line.text, new_num)
117 |       if kind in ('ctx', 'rem'):
118 |         assert old_num not in old_lines
119 |         assert old_num not in old_ctx_nums
120 |         line.old_num = old_num
121 |         old_lines[old_num] = line
122 |         old_ctx_nums.add(old_num)
123 |         old_num += 1
124 |       if kind in ('ctx', 'add'):
125 |         assert new_num not in new_lines
126 |         assert new_num not in new_ctx_nums
127 |         line.new_num = new_num
128 |         new_lines[new_num] = line
129 |         new_ctx_nums.add(new_num)
130 |         new_num += 1
131 |     elif kind == 'loc':
132 |       o = int(match['old_num'])
133 |       if o > 0:
134 |         assert o > old_num, (o, old_num, match.string)
135 |         old_num = o
136 |       n = int(match['new_num'])
137 |       if n > 0:
138 |         assert n > new_num
139 |         new_num = n
140 |     elif kind == 'old': old_path = vscode_path(match['old_path'].rstrip('\t'))
141 |     elif kind == 'new': new_path = vscode_path(match['new_path'].rstrip('\t')) # Not sure why this trailing tab appears.
142 | 
143 |   # Detect moved lines.
144 | 
145 |   def diff_lines_match(old_idx:int, new_idx:int) -> bool:
146 |     if old_idx in old_ctx_nums or new_idx in new_ctx_nums: return False
147 |     try: return old_lines[old_idx].text.strip() == new_lines[new_idx].text.strip()
148 |     except KeyError: return False
149 | 
150 |   old_moved_nums:Set[int] = set()
151 |   new_moved_nums:Set[int] = set()
152 |   for body, new_idx in new_uniques.items():
153 |     if new_idx is None: continue
154 |     old_idx = old_uniques.get(body)
155 |     if old_idx is None: continue
156 |     p_o = old_idx
157 |     p_n = new_idx
158 |     while diff_lines_match(p_o-1, p_n-1):
159 |       p_o -= 1
160 |       p_n -= 1
161 |     e_o = old_idx + 1
162 |     e_n = new_idx + 1
163 |     while diff_lines_match(e_o, e_n):
164 |       e_o += 1
165 |       e_n += 1
166 |     old_moved_nums.update(range(p_o, e_o))
167 |     new_moved_nums.update(range(p_n, e_n))
168 | 
169 |   # Break lines into rem/add chunks.
170 |   # While a "hunk" is a series of (possibly many) ctx/rem/add lines provided by git diff,
171 |   # a "chunk" is either a contiguous block of rem/add lines, or else any other single line.
172 |   # This approach simplifies the token diffing process so that it is a reasonably
173 |   # straightforward comparison of a rem block to an add block.
174 | 
175 |   def chunk_key(line:DiffLine) -> Tuple[int, bool]:
176 |     return (line.is_src, line.chunk_idx, (line.old_num in old_moved_nums or line.new_num in new_moved_nums))
177 | 
178 |   for ((is_src, chunk_idx, is_moved), _chunk) in groupby(lines, key=chunk_key):
179 |     chunk = list(_chunk) # We iterate over the sequence several times.
180 |     if chunk_idx and not is_moved: # Chunk should be diffed by tokens.
181 |       # We must ensure that the same number of lines is output, at least for `-interactive` mode.
182 |       # Currently, we do not reorder lines at all, but that is an option for the future.
183 |       rem_lines = [l for l in chunk if l.old_num]
184 |       add_lines = [l for l in chunk if l.new_num]
185 |       add_token_diffs(rem_lines, add_lines)
186 |     elif is_src: # ctx or moved.
187 |       for l in chunk:
188 |         l.text = highlight_strange_chars(l.text)
189 | 
190 |     # Print lines.
191 |     for line in chunk:
192 |       kind = line.kind
193 |       match = line.match
194 |       text = line.text
195 |       if kind == 'ctx':
196 |         print(text)
197 |       elif kind == 'rem':
198 |         m = C_REM_MOVED if line.old_num in old_moved_nums else ''
199 |         print(C_REM_LINE, m, text, C_END, sep='')
200 |       elif kind == 'add':
201 |         m = C_ADD_MOVED if line.new_num in new_moved_nums else ''
202 |         print(C_ADD_LINE, m, text, C_END, sep='')
203 |       elif kind == 'loc':
204 |         new_num = match['new_num']
205 |         snippet = match['parent_snippet']
206 |         s = ' ' + C_SNIPPET if snippet else ''
207 |         print(C_LOC, new_path, ':', new_num, ':', s, snippet, C_END, sep='')
208 |       elif kind == 'diff':
209 |         msg = new_path if (old_path == new_path) else '{} -> {}'.format(old_path, new_path)
210 |         print(C_FILE, msg, ':', C_END, sep='')
211 |       elif kind == 'meta':
212 |         print(C_MODE, new_path, ':', RST, ' ', line.rich_text, sep='')
213 |       elif kind in dropped_kinds:
214 |         if interactive: # cannot drop lines, becasue interactive mode slices the diff by line counts.
215 |           print(C_DROPPED, line.plain_text, RST, sep='')
216 |       elif kind in pass_kinds:
217 |         print(line.rich_text)
218 |       else:
219 |         raise Exception('unhandled kind: {}\n{!r}'.format(kind, text))
220 | 
221 | 
222 | def insert_unique_line(d:Dict[str, Optional[int]], line:str, idx:int) -> None:
223 |   'For the purpose of movement detection, lines are tested for uniqueness after stripping leading and trailing whitespace.'
224 |   body = line.strip()
225 |   if body in d: d[body] = None
226 |   else: d[body] = idx
227 | 
228 | 
229 | def add_token_diffs(rem_lines:List[DiffLine], add_lines:List[DiffLine]) -> None:
230 |   'Rewrite DiffLine.text values to include per-token diff highlighting.'
231 |   # Get lists of tokens for the entire chunk.
232 |   r_tokens = tokenize_difflines(rem_lines)
233 |   a_tokens = tokenize_difflines(add_lines)
234 |   m = SequenceMatcher(isjunk=is_token_junk, a=r_tokens, b=a_tokens, autojunk=True)
235 |   r_frags:List[List[str]] = [[] for _ in rem_lines] # Accumulate highlighted tokens.
236 |   a_frags:List[List[str]] = [[] for _ in add_lines]
237 |   r_line_idx = 0 # Step through the accumulators.
238 |   a_line_idx = 0
239 |   r_d = 0 # Token index of previous/next diff.
240 |   a_d = 0
241 |   # TODO: r_lit, a_lit flags could slightly reduce emission of color sequences.
242 |   blocks = m.get_matching_blocks() # last block is the sentinel: (len(a), len(b), 0).
243 |   for r_p, a_p, l in m.get_matching_blocks():
244 |     # Highlight the differing tokens.
245 |     r_line_idx = append_frags(r_frags, r_tokens, r_line_idx, r_d, r_p, C_REM_TOKEN)
246 |     a_line_idx = append_frags(a_frags, a_tokens, a_line_idx, a_d, a_p, C_ADD_TOKEN)
247 |     r_d = r_p+l # update to end of match / beginning of next diff.
248 |     a_d = a_p+l
249 |     # Do not highlight the matching tokens.
250 |     r_line_idx = append_frags(r_frags, r_tokens, r_line_idx, r_p, r_d, C_RST_TOKEN)
251 |     a_line_idx = append_frags(a_frags, a_tokens, a_line_idx, a_p, a_d, C_RST_TOKEN)
252 |   for rem_line, frags in zip(rem_lines, r_frags):
253 |     rem_line.text = ''.join(frags)
254 |   for add_line, frags in zip(add_lines, a_frags):
255 |     add_line.text = ''.join(frags)
256 | 
257 | 
258 | def tokenize_difflines(lines:List[DiffLine]) -> List[str]:
259 |   'Convert the list of line texts into a single list of tokens, including newline tokens.'
260 |   tokens:List[str] = []
261 |   for i, line in enumerate(lines):
262 |     if i: tokens.append('\n')
263 |     tokens.extend(m[0] for m in token_pat.finditer(line.text))
264 |   return tokens
265 | 
266 | 
267 | def is_token_junk(token:str) -> bool:
268 |   '''
269 |   Treate newlines as tokens, but all other whitespace as junk.
270 |   This forces the diff algorithm to respect line breaks but not get distracted aligning to whitespace.
271 |   '''
272 |   return token.isspace() and token != '\n'
273 | 
274 | 
275 | def append_frags(frags:List[List[str]], tokens:List[str], line_idx:int, pos:int, end:int, highlight:str) -> int:
276 |   for frag in tokens[pos:end]:
277 |     if frag == '\n':
278 |       line_idx += 1
279 |     else:
280 |       line_frags = frags[line_idx]
281 |       line_frags.append(highlight)
282 |       line_frags.append(highlight_strange_chars(frag))
283 |   return line_idx
284 | 
285 | 
286 | def highlight_strange_chars(string:str) -> str:
287 |   return strange_char_pat.sub(
288 |     lambda m: '{}{}{}'.format(C_STRANGE, m[0].translate(strange_char_trans_table), C_RST_STRANGE),
289 |     string)
290 | 
291 | 
292 | dropped_kinds = {
293 |   'idx', 'old', 'new'
294 | }
295 | 
296 | pass_kinds = {
297 |   'empty', 'other'
298 | }
299 | 
300 | 
301 | sgr_pat = re.compile(r'\x1B\[[0-9;]*m')
302 | 
303 | graph_pat = re.compile(r'(?x) [ /\*\|\\]*') # space is treated as literal inside of brackets, even in extended mode.
304 | 
305 | diff_pat = re.compile(r'''(?x)
306 | (?:
307 |   (?P<empty> $)
308 | | (?P<commit>   commit\ [0-9a-z]{40} )
309 | | (?P<author>   Author: )
310 | | (?P<date>     Date:   )
311 | | (?P<diff>     diff\ --git )
312 | | (?P<idx>      index   )
313 | | (?P<old>      ---     \ (?P<old_path>.+) )
314 | | (?P<new>      \+\+\+  \ (?P<new_path>.+) )
315 | | (?P<loc>      @@\ -(?P<old_num>\d+)(?P<old_len>,\d+)?\ \+(?P<new_num>\d+)(?P<new_len>,\d+)?\ @@\ ?(?P<parent_snippet>.*) )
316 | | (?P<ctx>      \  (?P<ctx_text>.*) )
317 | | (?P<rem>      -  (?P<rem_text>.*) )
318 | | (?P<add>      \+ (?P<add_text>.*) )
319 | | (?P<meta>
320 |   ( old\ mode
321 |   | new\ mode
322 |   | deleted\ file\ mode
323 |   | new\ file\ mode
324 |   | copy\ from
325 |   | copy\ to
326 |   | rename\ from
327 |   | rename\ to
328 |   | similarity\ index
329 |   | dissimilarity\ index ) )
330 | | (?P<other> .* )
331 | )
332 | ''')
333 | 
334 | 
335 | token_pat = re.compile(r'''(?x)
336 |   \w[\w\d]* # Symbol token.
337 | | \d+ # Number token.
338 | | \ + # Spaces; distinct from other whitespace.
339 | | \t+ # Tabs; distinct from other whitespace.
340 | | \s+ # Other whitespace.
341 | | . # Any other single character; newlines are never present so DOTALL is irrelevant.
342 | ''')
343 | 
344 | 
345 | # Unicode ranges for strange characters:
346 | # C0:   \x00 - \x1F
347 | # \n:   \x0A
348 | # C0 !\n: [ \x00-\x09 \x0B-\x1F ]
349 | # SP:   \x20
350 | # DEL:  \x7F
351 | # C1:   \x80 - \x9F
352 | # NBSP: \xA0 (nonbreaking space)
353 | # SHY:  \xAD (soft hyphen)
354 | strange_char_re = r'(?x) [\x00-\x09\x0B-\x1F\x7F\x80-\x9F\xA0\xAD]+'
355 | strange_char_pat = re.compile(strange_char_re)
356 | assert not strange_char_pat.match(' ')
357 | 
358 | strange_char_ords = chain(range(0, 0x09+1), range(0x0B, 0x1F+1), range(0x7F, 0x7F+1),
359 |   range(0x80, 0x9F+1), range(0xA0, 0xA0+1), range(0xAD, 0xAD+1))
360 | assert ord(' ') not in strange_char_ords
361 | strange_char_names = { i : '\\x{:02x}'.format(i) for i in strange_char_ords }
362 | strange_char_names.update({
363 |   '\0' : '\\0',
364 |   '\a' : '\\a',
365 |   '\b' : '\\b',
366 |   '\f' : '\\f',
367 |   '\r' : '\\r',
368 |   '\t' : '\\t',
369 |   '\v' : '\\v',
370 | })
371 | 
372 | strange_char_trans_table = str.maketrans(strange_char_names)
373 | 
374 | 
375 | # ANSI control sequence indicator.
376 | CSI = '\x1b['
377 | 
378 | ERASE_LINE_F = CSI + 'K' # Sending erase line forward while background color is set colors to end of line.
379 | 
380 | def sgr(*codes:Any) -> str:
381 |   'Select Graphic Rendition control sequence string.'
382 |   code = ';'.join(str(c) for c in codes)
383 |   return '\x1b[{}m'.format(code)
384 | 
385 | RST = sgr()
386 | 
387 | RST_BOLD, RST_ULINE, RST_BLINK, RST_INVERT, RST_TXT, RST_BG = (22, 24, 25, 27, 39, 49)
388 | 
389 | BOLD, ULINE, BLINK, INVERT = (1, 4, 5, 7)
390 | 
391 | 
392 | # xterm-256 sequence initiators; these should be followed by a single color index.
393 | # both text and background can be specified in a single sgr call.
394 | TXT = '38;5'
395 | BG = '48;5'
396 | 
397 | # RGB6 color cube: 6x6x6, from black to white.
398 | K = 16  # black.
399 | W = 231 # white.
400 | 
401 | # Grayscale: the 24 palette values have a suggested 8 bit grayscale range of [8, 238].
402 | middle_gray_indices = range(232, 256)
403 | 
404 | def gray26(n:int) -> int:
405 |   assert 0 <= n < 26
406 |   if n == 0: return K
407 |   if n == 25: return W
408 |   return W + n
409 | 
410 | def rgb6(r:int, g:int, b:int) -> int:
411 |   'index RGB triples into the 256-color palette (returns 16 for black, 231 for white).'
412 |   assert 0 <= r < 6
413 |   assert 0 <= g < 6
414 |   assert 0 <= b < 6
415 |   return (((r * 6) + g) * 6) + b + 16
416 | 
417 | 
418 | # same-same colors.
419 | 
420 | C_FILE = sgr(BG, rgb6(1, 0, 1))
421 | C_MODE = sgr(BG, rgb6(1, 0, 1))
422 | C_LOC = sgr(BG, rgb6(0, 1, 2))
423 | C_UNKNOWN = sgr(BG, rgb6(5, 0, 5))
424 | C_SNIPPET = sgr(TXT, gray26(22))
425 | C_DROPPED = sgr(TXT, gray26(10))
426 | 
427 | C_REM_LINE = sgr(BG, rgb6(1, 0, 0))
428 | C_ADD_LINE = sgr(BG, rgb6(0, 1, 0))
429 | C_REM_MOVED = sgr(TXT, rgb6(4, 2, 0))
430 | C_ADD_MOVED = sgr(TXT, rgb6(2, 4, 0))
431 | C_REM_TOKEN = sgr(TXT, rgb6(5, 2, 3), BOLD)
432 | C_ADD_TOKEN = sgr(TXT, rgb6(2, 5, 3), BOLD)
433 | 
434 | C_RST_TOKEN = sgr(RST_TXT, RST_BOLD)
435 | 
436 | C_STRANGE = sgr(INVERT)
437 | C_RST_STRANGE = sgr(RST_INVERT)
438 | 
439 | C_END = ERASE_LINE_F + RST
440 | 
441 | 
442 | def vscode_path(path:str) -> str:
443 |   'VSCode will only recognize source locations if the path contains a slash; add "./" to plain file names.'
444 |   if '/' in path or '<' in path or '>' in path: return path # Do not alter pseudo-names like <stdin>.
445 |   return './' + path
446 | 
447 | def errL(*items:Any) -> None: print(*items, sep='', file=stderr)
448 | 
449 | def errSL(*items:Any) -> None: print(*items, file=stderr)
450 | 
451 | 
452 | if __name__ == '__main__': main()
453 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # Dedicated to the public domain under CC0: https://creativecommons.org/publicdomain/zero/1.0/.
 2 | 
 3 | [build]
 4 | build-base=_build
 5 | 
 6 | [sdist]
 7 | dist-dir=_build
 8 | 
 9 | [mypy]
10 | python_version = 3.6
11 | cache_dir = _build/mypy_cache
12 | 
13 | check_untyped_defs = True
14 | disallow_untyped_calls = True
15 | disallow_untyped_defs = False
16 | disallow_subclassing_any = True
17 | show_column_numbers = True
18 | show_none_errors = True
19 | strict_optional = True
20 | strict_boolean = False
21 | warn_no_return = True
22 | warn_redundant_casts = True
23 | warn_return_any = True
24 | warn_unused_ignores = True
25 | warn_incomplete_stub = True
26 | 


--------------------------------------------------------------------------------