├── pit ├── __init__.py ├── commands │ ├── __init__.py │ ├── init.py │ ├── base.py │ ├── add.py │ ├── branch.py │ ├── checkout.py │ ├── log.py │ ├── commit.py │ ├── status.py │ └── diff.py ├── constants.py ├── pager.py ├── hunk.py ├── revesion.py ├── exceptions.py ├── values.py ├── database.py ├── migration.py ├── refs.py ├── tree_diff.py ├── repository.py ├── diff.py ├── git_object.py └── index.py ├── .gitignore ├── Readme.md ├── git-objects.py └── pit.py /pit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pit/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | -------------------------------------------------------------------------------- /pit/commands/init.py: -------------------------------------------------------------------------------- 1 | from pit.commands.base import BaseCommand 2 | 3 | 4 | class InitCommand(BaseCommand): 5 | def run(self): 6 | self.repo.database.init() 7 | self.repo.refs.init() 8 | -------------------------------------------------------------------------------- /pit/constants.py: -------------------------------------------------------------------------------- 1 | IGNORE = {".git"} 2 | 3 | 4 | class Color: 5 | RED = "\033[31m" 6 | GREEN = "\033[32m" 7 | WHITE = "\033[97m" 8 | BOLD = "\033[1m" 9 | CYAN = "\033[36m" 10 | YELLOW = "\033[33m" 11 | RESET_ALL = "\033[0m" 12 | -------------------------------------------------------------------------------- /pit/commands/base.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pit.repository import Repository 4 | 5 | 6 | class BaseCommand: 7 | def __init__(self, root_dir: str): 8 | self.root_dir = Path(root_dir) 9 | self.repo = Repository(root_dir) 10 | 11 | def run(self): 12 | raise NotImplementedError 13 | 14 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Pit 2 | 3 | A pure Python implementation for Git based on `Buliding Git` 4 | 5 | ## Features 6 | 7 | Supported commands: 8 | * pit init 9 | * pit add `` 10 | * pit commit -m `` 11 | * pit status 12 | * pit status --porcelain 13 | * pit diff 14 | * pit diff --cached 15 | * pit branch -v 16 | * pit branch `` `` 17 | * pit branch -D `` 18 | * pit checkout `/` 19 | * pit log 20 | * pit log --oneline 21 | 22 | -------------------------------------------------------------------------------- /pit/pager.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | from contextlib import contextmanager 4 | 5 | 6 | @contextmanager 7 | def pager(): 8 | try: 9 | "-F for quit-if-one-screen" 10 | "To prevent less from clearing the screen upon exit, use -X" 11 | pager = subprocess.Popen( 12 | ["less", "-c", "-R", "-S", "-K"], 13 | stdin=subprocess.PIPE, 14 | stdout=sys.stdout, 15 | text=True, 16 | ) 17 | sys.stdout = pager.stdin 18 | 19 | yield 20 | 21 | pager.stdin.close() 22 | pager.wait() 23 | except KeyboardInterrupt: 24 | pass 25 | -------------------------------------------------------------------------------- /pit/hunk.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | from pit.constants import Color 4 | from pit.diff import Line, Edit 5 | 6 | 7 | @dataclass 8 | class Hunk: 9 | a_start: int 10 | b_start: int 11 | edits: list[Edit] = field(default_factory=list) 12 | 13 | CONTEXT = 3 14 | 15 | def header(self): 16 | a_lines = len([edit for edit in self.edits if edit.a_line]) 17 | b_lines = len([edit for edit in self.edits if edit.b_line]) 18 | return f"{Color.CYAN}@@ -{self.a_start},{a_lines} +{self.b_start},{b_lines} @@{Color.RESET_ALL}" 19 | 20 | @classmethod 21 | def filters(cls, edits: list[Edit]): 22 | i = 0 23 | hunks = [] 24 | while i < len(edits): 25 | while i < len(edits) and edits[i].type == " ": 26 | i += 1 27 | 28 | if i >= len(edits): 29 | break 30 | 31 | i -= cls.CONTEXT 32 | hunk = ( 33 | Hunk(0, 0) 34 | if i < 0 35 | else Hunk(edits[i].a_line.number, edits[i].b_line.number) 36 | ) 37 | 38 | i = max(i, 0) 39 | while edits[i].type == " ": 40 | hunk.edits.append(edits[i]) 41 | i += 1 42 | equals = 0 43 | while equals <= 2 and i < len(edits): 44 | if edits[i].type == " ": 45 | equals += 1 46 | else: 47 | equals = 0 48 | hunk.edits.append(edits[i]) 49 | i += 1 50 | hunks.append(hunk) 51 | 52 | return hunks 53 | -------------------------------------------------------------------------------- /pit/commands/add.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pit.commands.base import BaseCommand 4 | from pit.git_object import Blob 5 | from pit.values import GitPath 6 | 7 | 8 | class AddCommand(BaseCommand): 9 | def __init__(self, root_dir: str, *, paths: list[str]): 10 | super().__init__(root_dir) 11 | self.paths = paths 12 | 13 | def run(self): 14 | if not self.paths: 15 | return 16 | for path in self.paths: 17 | git_path = GitPath(path, self.root_dir) 18 | if not git_path.path.exists() and str(git_path) not in self.repo.index.entries: 19 | print(f"fatal: pathspec '{path}' did not match any files") 20 | return 21 | 22 | for path in self.paths: 23 | path = Path(path) 24 | if not path.exists(): 25 | self.repo.index.remove_file(path) 26 | continue 27 | if path.is_dir(): 28 | for sub_path in path.rglob("*"): 29 | if self._should_ignore(sub_path): 30 | continue 31 | if sub_path.is_file(): 32 | self.repo.database.store(Blob(sub_path.read_bytes())) 33 | self.repo.index.add_file(sub_path) 34 | else: 35 | self.repo.database.store(Blob(path.read_bytes())) 36 | self.repo.index.add_file(path) 37 | 38 | for deleted in self.repo.status.workspace_deleted: 39 | self.repo.index.remove_file(deleted) 40 | self.repo.database.store_index(self.repo.index) 41 | 42 | 43 | def _should_ignore(self, path: Path): 44 | return any(ignore in path.parts for ignore in self.repo.ignores) 45 | -------------------------------------------------------------------------------- /git-objects.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | import itertools 4 | import re 5 | import zlib 6 | import sys 7 | 8 | 9 | @dataclass 10 | class GitObject: 11 | hash: str 12 | type: str 13 | length: int 14 | content: bytes 15 | 16 | def display(self, mode: str, max_strlen: int = 88): 17 | display = f"<{self.type.capitalize()}> {self.length} {self.hash}" 18 | match (mode, self.type): 19 | case ("full", "blob") if self.length > max_strlen: 20 | display += f" : {self.content[:max_strlen]}..." 21 | case ("full", _): 22 | display += f" : {self.content}" 23 | case _: 24 | pass 25 | return display 26 | 27 | 28 | def show_git_objects(mode: str, project: str): 29 | project = Path(project) 30 | objects_dir = project / ".git/objects" 31 | objects = list( 32 | itertools.chain( 33 | *[[f"{d.name}{f.name}" for f in d.iterdir()] for d in objects_dir.iterdir() if d.name not in {"info", "pack"}] 34 | ) 35 | ) 36 | git_objects = [] 37 | for obj in objects: 38 | obj_file = objects_dir / obj[:2] / obj[2:] 39 | obj_content = zlib.decompress(obj_file.read_bytes()) 40 | type, length, *_ = re.split(b" |\x00", obj_content, maxsplit=2) 41 | type, length = type.decode(), int(length) 42 | git_objects.append( 43 | GitObject(hash=obj, type=type, length=length, content=obj_content) 44 | ) 45 | git_objects.sort(key=lambda x: {'commit': 1, 'tree': 2, 'blob': 3}[x.type]) 46 | for obj in git_objects: 47 | print(obj.display(mode=mode)) 48 | 49 | 50 | if __name__ == "__main__": 51 | show_git_objects(sys.argv[-1], sys.argv[-2]) 52 | -------------------------------------------------------------------------------- /pit/revesion.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pit.exceptions import UnknownRevision, InvalidRevision 4 | from pit.git_object import Commit 5 | from pit.repository import Repository 6 | 7 | 8 | class AstNode: 9 | pass 10 | 11 | 12 | class Revision: 13 | REF_MATCH = r"[^\^\~]+" 14 | ANCESTOR_MATCH = r"([~\^]+)(\d*)" 15 | 16 | @classmethod 17 | def resolve(cls, revision: str, *, repo: Repository) -> str: 18 | ref, parents = cls._parse(revision) 19 | if ref.lower() in ("head", "@"): 20 | ref = repo.refs.read_head() 21 | elif ref in repo.refs.list_branches(): 22 | ref = repo.refs.read_branch(ref) 23 | else: 24 | ref = repo.database.prefix_match(ref) 25 | 26 | commit = repo.database.load(ref) 27 | if not isinstance(commit, Commit): 28 | raise InvalidRevision(revision) 29 | 30 | for _ in range(parents): 31 | ref = commit.parent_oid 32 | if not ref: 33 | raise UnknownRevision(revision) 34 | 35 | commit = repo.database.load(ref) 36 | if not isinstance(commit, Commit): 37 | raise InvalidRevision(revision) 38 | 39 | return ref 40 | 41 | @classmethod 42 | def _parse(cls, expr: str) -> (str, int): 43 | matched = re.match(cls.REF_MATCH, expr) 44 | start_ref = matched.group() 45 | remain = expr[matched.end() :] 46 | 47 | parents = 0 48 | while remain: 49 | if matched := re.match(cls.ANCESTOR_MATCH, remain): 50 | parents += len(matched.group(1)) 51 | if matched.group(2): 52 | parents += int(matched.group(2)) - 1 53 | remain = remain[matched.end() :] 54 | else: 55 | raise InvalidRevision(expr) 56 | return start_ref, parents 57 | -------------------------------------------------------------------------------- /pit/exceptions.py: -------------------------------------------------------------------------------- 1 | class PitError(Exception): 2 | pass 3 | 4 | 5 | class InvalidBranchName(PitError): 6 | def __init__(self, branch_name: str): 7 | self.branch_name = branch_name 8 | 9 | def __str__(self): 10 | return f"fatal: '{self.branch_name}' is not a valid branch name." 11 | 12 | 13 | class BranchAlreadyExists(PitError): 14 | def __init__(self, branch_name: str): 15 | self.branch_name = branch_name 16 | 17 | def __str__(self): 18 | return f"fatal: A branch named '{self.branch_name}' already exists." 19 | 20 | 21 | class InvalidRevision(PitError): 22 | def __init__(self, revision: str): 23 | self.revision = revision 24 | 25 | def __str__(self): 26 | return f"fatal: Not a valid object name: {self.revision}." 27 | 28 | 29 | class UnknownRevision(PitError): 30 | def __init__(self, revision: str): 31 | self.revision = revision 32 | 33 | def __str__(self): 34 | return f"""fatal: ambiguous argument '{self.revision}': unknown revision or path not in the working tree. 35 | Use '--' to separate paths from revisions, like this: 36 | 'git [...] -- [...]'""" 37 | 38 | 39 | class AmbiguousRevision(PitError): 40 | def __init__(self, revision: str): 41 | self.revision = revision 42 | 43 | def __str__(self): 44 | return f"""error: short SHA1 {self.revision} is ambiguous 45 | fatal: Not a valid object name: '{self.revision}'""" 46 | 47 | 48 | class CheckoutConflict(PitError): 49 | def __init__(self, conflicts: list[str]): 50 | self.conflicts = conflicts 51 | 52 | def __str__(self): 53 | files = "\n".join([f" {path}" for path in self.conflicts]) 54 | return f"""error: The following untracked working tree files would be overwritten by checkout: 55 | {files} 56 | Please move or remove them before you switch branches. 57 | Aborting""" 58 | -------------------------------------------------------------------------------- /pit/values.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, InitVar 2 | from functools import cached_property 3 | from pathlib import Path 4 | 5 | from pit.exceptions import InvalidBranchName 6 | 7 | 8 | @dataclass(order=True) 9 | class GitPath: 10 | path: str | Path 11 | root_dir: InitVar[str | Path] 12 | 13 | def __post_init__(self, root_dir: str | Path): 14 | self.path = Path(self.path).absolute().relative_to(Path(root_dir).absolute()) 15 | 16 | def __str__(self): 17 | return f"{self.path if not self.path.exists() or self.path.is_file() else f'{self.path}/'}" 18 | 19 | 20 | @dataclass(frozen=True) 21 | class GitFileMode: 22 | mode: int 23 | 24 | def __bytes__(self): 25 | mode = "{0:o}".format(self.mode) 26 | 27 | if mode.startswith("4"): 28 | return b"40000" 29 | 30 | if mode[3] == "6": 31 | return b"100644" 32 | 33 | # if mode[3] == b"7": 34 | return b"100755" 35 | 36 | def is_dir(self) -> bool: 37 | return bytes(self) == b"40000" 38 | 39 | def is_file(self) -> bool: 40 | return not self.is_dir() 41 | 42 | @classmethod 43 | def from_raw(cls, raw: bytes) -> "GitFileMode": 44 | return GitFileMode(int(raw, 8)) 45 | 46 | @classmethod 47 | def dir(cls) -> int: 48 | return int(b"40000", 8) 49 | 50 | 51 | @dataclass(frozen=True) 52 | class AuthorSign: 53 | name: str 54 | email: str 55 | timestamp: int 56 | timezone: str 57 | 58 | def __bytes__(self): 59 | return b"%s <%s> %d %s" % ( 60 | self.name.encode(), 61 | self.email.encode(), 62 | self.timestamp, 63 | self.timezone.encode(), 64 | ) 65 | 66 | 67 | @dataclass() 68 | class ObjectId: 69 | object_id: str 70 | 71 | @cached_property 72 | def short_id(self): 73 | return self.object_id[:7] 74 | 75 | 76 | @dataclass() 77 | class BranchName: 78 | name: str 79 | 80 | def __post_init__(self): 81 | if self.name.startswith("."): 82 | self._raise() 83 | if self.name.endswith(".lock"): 84 | self._raise() 85 | if any(s in self.name for s in ("^", "~", "/", "..", "@{")): 86 | self._raise() 87 | 88 | def _raise(self): 89 | raise InvalidBranchName(self.name) 90 | 91 | def __str__(self): 92 | return self.name 93 | -------------------------------------------------------------------------------- /pit/commands/branch.py: -------------------------------------------------------------------------------- 1 | from pit.constants import Color 2 | from pit.exceptions import ( 3 | InvalidBranchName, 4 | BranchAlreadyExists, 5 | InvalidRevision, 6 | UnknownRevision, 7 | ) 8 | from pit.commands.base import BaseCommand 9 | from pit.pager import pager 10 | from pit.revesion import Revision 11 | from pit.values import ObjectId 12 | 13 | 14 | class BranchCommand(BaseCommand): 15 | def __init__( 16 | self, 17 | root_dir: str, 18 | *, 19 | name: str, 20 | revision: str, 21 | delete: bool = False, 22 | verbose: bool = False, 23 | ): 24 | super().__init__(root_dir) 25 | self.name = name 26 | self.revision = revision 27 | self.delete = delete 28 | self.verbose = verbose 29 | 30 | def run(self): 31 | if self.delete: 32 | self._delete_branch() 33 | return 34 | 35 | if not self.name and not self.revision: 36 | with pager(): 37 | self._list_branches() 38 | return 39 | 40 | try: 41 | if self.revision: 42 | oid = Revision.resolve(self.revision, repo=self.repo) 43 | else: 44 | oid = self.repo.refs.read_head() 45 | self.repo.refs.create_branch(self.name, oid) 46 | except ( 47 | InvalidBranchName, 48 | BranchAlreadyExists, 49 | InvalidRevision, 50 | UnknownRevision, 51 | ) as e: 52 | print(e) 53 | 54 | def _list_branches(self): 55 | current_branch = self.repo.refs.current_branch() 56 | for branch in self.repo.refs.list_branches(): 57 | verbose = "" 58 | if self.verbose: 59 | commit = self.repo.database.load(self.repo.refs.read_branch(branch)) 60 | verbose = f"{ObjectId(commit.oid).short_id} {commit.message}" 61 | 62 | if branch == current_branch: 63 | print(f"* {Color.GREEN}{branch}{Color.RESET_ALL} {verbose}") 64 | else: 65 | print(f" {branch} {verbose}") 66 | 67 | def _delete_branch(self): 68 | if not self.name: 69 | print("fatal: branch name required") 70 | return 71 | 72 | if self.name not in self.repo.refs.list_branches(): 73 | print(f"error: branch '{self.name}' not found.") 74 | return 75 | 76 | self.repo.refs.delete_branch(self.name) 77 | -------------------------------------------------------------------------------- /pit/database.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from functools import cached_property 3 | from pathlib import Path 4 | import zlib 5 | 6 | from pit.exceptions import InvalidRevision, AmbiguousRevision 7 | from pit.git_object import GitObject, Tree, Commit, Blob 8 | from pit.index import Index 9 | 10 | 11 | @dataclass 12 | class ObjectPath: 13 | oid: str 14 | root_dir: Path 15 | 16 | @cached_property 17 | def path(self) -> Path: 18 | return self.root_dir / ".git/objects" / self.oid[:2] / self.oid[2:] 19 | 20 | def load(self) -> GitObject: 21 | raw = zlib.decompress(self.path.read_bytes()) 22 | match raw.split(b' ')[0]: 23 | case b'tree': 24 | return Tree.from_raw(raw) 25 | case b'commit': 26 | return Commit.from_raw(raw) 27 | case b'blob': 28 | return Blob.from_raw(raw) 29 | case _: 30 | raise NotImplementedError 31 | 32 | 33 | class Database: 34 | def __init__(self, root_dir: Path): 35 | self.root_dir = root_dir 36 | self.git_dir = self.root_dir / ".git" 37 | self.objects_dir = self.git_dir / "objects" 38 | self.index_path = self.git_dir / "index" 39 | 40 | def init(self): 41 | self.objects_dir.mkdir(parents=True, exist_ok=True) 42 | 43 | def has_exists(self, object_id: str) -> bool: 44 | return ObjectPath(object_id, self.root_dir).path.exists() 45 | 46 | def store(self, obj: GitObject): 47 | object_path = ObjectPath(obj.oid, self.root_dir).path 48 | if object_path.exists(): 49 | return 50 | object_path.parent.mkdir(parents=True, exist_ok=True) 51 | object_path.write_bytes(zlib.compress(bytes(obj))) 52 | 53 | def load(self, object_id: str) -> GitObject: 54 | object_id = ObjectPath(object_id, self.root_dir) 55 | return object_id.load() 56 | 57 | def prefix_match(self, prefix_oid: str) -> str: 58 | if len(prefix_oid) < 2: 59 | raise InvalidRevision(prefix_oid) 60 | 61 | prefix_dir = self.objects_dir / prefix_oid[:2] 62 | if not prefix_dir.exists(): 63 | raise InvalidRevision(prefix_oid) 64 | 65 | objects = list(prefix_dir.glob(f'{prefix_oid[2:]}*')) 66 | if not objects: 67 | raise InvalidRevision(prefix_oid) 68 | if len(objects) >=2 : 69 | raise AmbiguousRevision(prefix_oid) 70 | 71 | return f'{prefix_oid[:2]}{objects[0].name}' 72 | 73 | def store_index(self, index: Index): 74 | self.index_path.parent.mkdir(parents=True, exist_ok=True) 75 | self.index_path.write_bytes(bytes(index)) 76 | -------------------------------------------------------------------------------- /pit/commands/checkout.py: -------------------------------------------------------------------------------- 1 | from pit.commands.base import BaseCommand 2 | from pit.exceptions import CheckoutConflict 3 | from pit.git_object import Commit 4 | from pit.migration import Migration 5 | from pit.revesion import Revision 6 | from pit.tree_diff import TreeDiff 7 | from pit.values import ObjectId 8 | 9 | 10 | class CheckoutCommand(BaseCommand): 11 | def __init__(self, root_dir: str, *, revision: str): 12 | super().__init__(root_dir) 13 | self.revision = revision 14 | 15 | def run(self): 16 | head = self.repo.refs.read_head() 17 | before_commit = self.repo.database.load(head) 18 | 19 | oid = Revision.resolve(self.revision, repo=self.repo) 20 | after_commit = self.repo.database.load(oid) 21 | 22 | diff = TreeDiff.diff( 23 | before_commit.tree_oid, after_commit.tree_oid, repo=self.repo 24 | ) 25 | try: 26 | Migration(self.repo).apply(diff) 27 | except CheckoutConflict as e: 28 | print(e) 29 | return 30 | 31 | if self.revision in self.repo.refs.list_branches(): 32 | if self.repo.refs.current_branch() == self.revision: 33 | 34 | print(f"Already on '{self.revision}'") 35 | return 36 | if self.repo.refs.is_detached(): 37 | self._show_detached_head_warning(before_commit) 38 | self.repo.refs.update_head(branch=self.revision) 39 | else: 40 | self.repo.refs.update_head(oid=after_commit.oid) 41 | self._show_switch_to_detached_head_warning(after_commit) 42 | 43 | def _show_detached_head_warning(self, before_commit: Commit): 44 | print( 45 | f"""Previous HEAD position was {ObjectId(before_commit.oid).short_id} {before_commit.message} 46 | Switched to branch '{self.revision}'""" 47 | ) 48 | 49 | def _show_switch_to_detached_head_warning(self, after_commit: Commit): 50 | message = f"""Note: switching to '{after_commit.oid}'. 51 | You are in 'detached HEAD' state. You can look around, make experimental 52 | changes and commit them, and you can discard any commits you make in this 53 | state without impacting any branches by switching back to a branch. 54 | 55 | If you want to create a new branch to retain commits you create, you may 56 | do so (now or later) by using -c with the switch command. Example: 57 | 58 | git switch -c 59 | 60 | Or undo this operation with: 61 | 62 | git switch - 63 | 64 | Turn off this advice by setting config variable advice.detachedHead to false 65 | 66 | HEAD is now at {ObjectId(after_commit.oid).short_id} {after_commit.message}""" 67 | print(message) 68 | -------------------------------------------------------------------------------- /pit/commands/log.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from collections import defaultdict 3 | from functools import cached_property 4 | 5 | from pit.commands.base import BaseCommand 6 | from pit.constants import Color 7 | from pit.git_object import Commit 8 | from pit.values import ObjectId 9 | 10 | 11 | class LogCommand(BaseCommand): 12 | def __init__(self, root_dir: str, *, oneline: bool): 13 | super().__init__(root_dir) 14 | self.oneline = oneline 15 | 16 | @cached_property 17 | def head(self): 18 | return self.repo.refs.read_head() 19 | 20 | @cached_property 21 | def oid2branches(self) -> dict[str, list[str]]: 22 | oid2branch = defaultdict(list) 23 | 24 | for branch in self.repo.refs.list_branches(): 25 | oid2branch[self.repo.refs.read_branch(branch)].append(branch) 26 | return oid2branch 27 | 28 | def run(self): 29 | if not self.head: 30 | return 31 | 32 | for commit in self._find_parent(self.head): 33 | self._show_commit(commit) 34 | 35 | def _show_commit(self, commit: Commit): 36 | decoration = self._generate_decoration(commit.oid) 37 | if self.oneline: 38 | self._show_oneline(commit, decoration) 39 | else: 40 | self._show_medium(commit, decoration) 41 | 42 | def _generate_decoration(self, oid: str) -> str: 43 | branches = self.oid2branches.get(oid) 44 | if not branches: 45 | return "" 46 | 47 | coloreds = [] 48 | for branch in branches: 49 | colored = f"{Color.GREEN}{Color.BOLD}{branch}{Color.RESET_ALL}" 50 | coloreds.append(colored) 51 | joined_branches = ", ".join(coloreds) 52 | if self.head == oid: 53 | joined_branches = ( 54 | f"{Color.CYAN}{Color.BOLD}HEAD -> {Color.RESET_ALL}{joined_branches}" 55 | ) 56 | return f"{Color.YELLOW}({Color.RESET_ALL}{joined_branches}{Color.YELLOW}){Color.RESET_ALL} " 57 | 58 | def _show_medium(self, commit: Commit, decoration: str): 59 | message = "\n ".join(commit.message.split("\n")) 60 | print( 61 | f"""{Color.YELLOW}commit {commit.oid}{Color.RESET_ALL} {decoration} 62 | Author: {commit.author.name} <{commit.author.email}> 63 | Date: {datetime.datetime.fromtimestamp(commit.author.timestamp).strftime("%a %b %d %H:%M:%S %Y")} {commit.author.timezone} 64 | 65 | {message} 66 | """ 67 | ) 68 | 69 | def _show_oneline(self, commit: Commit, decoration: str): 70 | print( 71 | f"""{Color.YELLOW}{ObjectId(commit.oid).short_id}{Color.RESET_ALL} {decoration}{commit.title}""" 72 | ) 73 | 74 | def _find_parent(self, parent: str): 75 | commit: Commit = self.repo.database.load(parent) 76 | yield commit 77 | if commit.parent_oid: 78 | yield from self._find_parent(commit.parent_oid) 79 | -------------------------------------------------------------------------------- /pit/migration.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from pit.exceptions import CheckoutConflict 5 | from pit.git_object import TreeEntry 6 | from pit.index import IndexEntry 7 | from pit.repository import Repository 8 | from pit.tree_diff import TreeDiff, Added, Deleted, Updated 9 | 10 | 11 | class Migration: 12 | def __init__(self, repo: Repository): 13 | self.repo = repo 14 | 15 | def apply(self, tree_diff: dict[str, Added | Deleted | Updated]): 16 | conflicts = [] 17 | added = {} 18 | deleted = {} 19 | updated = {} 20 | for path, diff in tree_diff.items(): 21 | path = Path(path) 22 | match diff: 23 | case Added(): 24 | if path.exists(): 25 | conflicts.append(str(path)) 26 | else: 27 | added[path] = diff 28 | case Deleted(): 29 | if self._detect_deleted_conflict(path, diff): 30 | conflicts.append(str(path)) 31 | else: 32 | deleted[path] = diff 33 | case Updated(): 34 | if self._detect_updated_conflict(path, diff): 35 | conflicts.append(str(path)) 36 | else: 37 | updated[path] = diff 38 | case _: 39 | pass 40 | if conflicts: 41 | raise CheckoutConflict(conflicts) 42 | 43 | # delete first 44 | for path, diff in deleted.items(): 45 | path.unlink(missing_ok=True) 46 | self.repo.index.remove_file(path) 47 | if not any(os.scandir(path.parent)): 48 | path.parent.rmdir() 49 | 50 | for path, diff in added.items(): 51 | path.parent.mkdir(parents=True, exist_ok=True) 52 | path.write_bytes(self.repo.database.load(diff.entry.oid).content) 53 | self.repo.index.add_file(path) 54 | for path, diff in updated.items(): 55 | path.write_bytes(self.repo.database.load(diff.after.oid).content) 56 | path.chmod(diff.after.mode) 57 | self.repo.index.add_file(path) 58 | 59 | self.repo.database.store_index(self.repo.index) 60 | 61 | def _detect_deleted_conflict(self, path: Path, deleted: Deleted): 62 | if not path.exists(): 63 | return True 64 | current = IndexEntry.from_file(path).to_tree_entry() 65 | if (current.mode != deleted.entry.mode 66 | or current.oid != deleted.entry.oid 67 | ): 68 | return True 69 | return False 70 | 71 | def _detect_updated_conflict(self, path: Path, updated: Updated): 72 | if not path.exists(): 73 | return True 74 | current = IndexEntry.from_file(path).to_tree_entry() 75 | if current.mode != updated.before.mode or current.oid != updated.before.oid: 76 | return True 77 | return False 78 | -------------------------------------------------------------------------------- /pit/refs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pit.exceptions import BranchAlreadyExists 4 | from pit.values import BranchName 5 | 6 | 7 | class Refs: 8 | def __init__(self, root_dir: Path): 9 | self.root_dir = root_dir 10 | self.git_dir = self.root_dir / ".git" 11 | self.refs_dir = self.git_dir / "refs/heads" 12 | self.head = self.git_dir / "HEAD" 13 | 14 | def init(self): 15 | self.refs_dir.mkdir(parents=True, exist_ok=True) 16 | self.head.write_text("ref: refs/heads/main") 17 | 18 | def is_detached(self) -> bool: 19 | return not self.head.read_text().startswith("ref: ") 20 | 21 | def update_ref_head(self, oid: str): 22 | ref_path = self._find_ref(self.head) 23 | ref_path.write_text(oid) 24 | 25 | def update_head(self, *, oid: str = None, branch: str = None): 26 | assert oid or branch, "Must provide one of the oid or branch args" 27 | if oid: 28 | self.head.write_text(oid) 29 | else: 30 | self.head.write_text(f"ref: refs/heads/{branch}") 31 | 32 | def read_head(self) -> str | None: 33 | if not self.head.exists(): 34 | return None 35 | return self._find_ref(self.head).read_text().strip() or None 36 | 37 | def read_ref(self, name: str) -> str | None: 38 | ref_path = self.refs_dir / name 39 | if not ref_path.exists(): 40 | return None 41 | return self._find_ref(ref_path).read_text().strip() or None 42 | 43 | def _find_ref(self, ref_path: Path) -> Path: 44 | if not ref_path.exists(): 45 | ref_path.parent.mkdir(parents=True, exist_ok=True) 46 | ref_path.write_text("") 47 | return ref_path 48 | 49 | ref = ref_path.read_text().strip() 50 | if not ref.startswith("ref: "): 51 | return ref_path 52 | child = self.git_dir / ref[5:] 53 | return self._find_ref(child) 54 | 55 | def create_branch(self, name: str, oid: str): 56 | branch_name = BranchName(name) 57 | branch_path = self.refs_dir / str(branch_name) 58 | if branch_path.exists(): 59 | raise BranchAlreadyExists(str(branch_name)) 60 | branch_path.write_text(oid) 61 | 62 | def list_branches(self) -> list[str]: 63 | return [branch.name for branch in self.refs_dir.iterdir()] 64 | 65 | def delete_branch(self, name: str): 66 | branch_path = self.refs_dir / str(name) 67 | branch_path.unlink(missing_ok=True) 68 | 69 | def read_branch(self, name: str) -> str: 70 | branch_name = BranchName(name) 71 | branch_path = self.refs_dir / str(branch_name) 72 | return self._find_ref(branch_path).read_text().strip() 73 | 74 | def current_branch(self) -> str | None: 75 | if self.is_detached(): 76 | return None 77 | return self.head.read_text().split("/")[-1] 78 | 79 | def _write_branch(self, path: Path, oid: str): 80 | path.write_text(oid) 81 | 82 | 83 | if __name__ == "__main__": 84 | refs = Refs(Path(__file__).parent.parent) 85 | print("Current head: ", refs.read_head()) 86 | assert len(refs.read_head()) == 40 87 | print("Branch main head: ", refs.read_head()) 88 | -------------------------------------------------------------------------------- /pit/commands/commit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from collections import defaultdict 4 | from pathlib import Path 5 | 6 | from pit.commands.base import BaseCommand 7 | from pit.git_object import Tree, TreeEntry, Commit 8 | from pit.index import IndexEntry 9 | from pit.values import GitFileMode, AuthorSign 10 | 11 | 12 | class CommitCommand(BaseCommand): 13 | def __init__( 14 | self, root_dir: str, *, author_name: str, author_email: str, commit_msg: str 15 | ): 16 | super().__init__(root_dir) 17 | self.author_name = author_name 18 | self.author_email = author_email 19 | self.commit_msg = commit_msg 20 | 21 | def run(self): 22 | # construct index tree 23 | index_tree: dict[str, defaultdict | IndexEntry] = defaultdict( 24 | lambda: defaultdict(dict) 25 | ) 26 | 27 | def construct_tree(index_entry: IndexEntry): 28 | path = Path(index_entry.file_path) 29 | 30 | tree = index_tree 31 | for part in path.parent.parts: 32 | tree = tree[part] 33 | tree[path.name] = index_entry 34 | 35 | for index_entry in self.repo.index.entries.values(): 36 | construct_tree(index_entry) 37 | 38 | # save git tree object 39 | trees = [] 40 | 41 | def construct_tree_object( 42 | root: dict[str, dict | IndexEntry], 43 | tree: Tree | None, 44 | parents: list[str], 45 | ): 46 | if isinstance(root, IndexEntry): 47 | tree.entries.append(root.to_tree_entry()) 48 | return tree 49 | 50 | sub_tree = Tree(entries=[]) 51 | for part in root: 52 | construct_tree_object(root[part], sub_tree, parents=[*parents, part]) 53 | # Save later for same tree detection when implementing "nothing to commit, working tree clean" 54 | trees.append(sub_tree) 55 | tree.entries.append( 56 | TreeEntry( 57 | oid=sub_tree.oid, 58 | path=os.path.join(*parents), 59 | mode=GitFileMode.dir(), 60 | ) 61 | ) 62 | return tree 63 | 64 | root_tree = construct_tree_object(index_tree, Tree(entries=[]), parents=[""]) 65 | tree_oid = root_tree.entries[0].oid 66 | if ( 67 | self.repo.database.has_exists(tree_oid) 68 | # Also diff from the previous commit's tree oid 69 | and self.repo.refs.read_head() 70 | and self.repo.database.load(self.repo.refs.read_head()).tree_oid == tree_oid 71 | ): 72 | print("nothing to commit, working tree clean") 73 | return 74 | 75 | for tree in trees: 76 | self.repo.database.store(tree) 77 | 78 | commit = Commit( 79 | tree_oid=tree_oid, 80 | author=AuthorSign( 81 | name=self.author_name, 82 | email=self.author_email, 83 | timestamp=int(time.time()), 84 | timezone="+0800", 85 | ), 86 | message=self.commit_msg, 87 | parent_oid=self.repo.refs.read_head(), 88 | ) 89 | self.repo.database.store(commit) 90 | self.repo.refs.update_ref_head(commit.oid) 91 | -------------------------------------------------------------------------------- /pit.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | import sys 5 | 6 | from pit.commands.add import AddCommand 7 | from pit.commands.branch import BranchCommand 8 | from pit.commands.checkout import CheckoutCommand 9 | from pit.commands.commit import CommitCommand 10 | from pit.commands.diff import DiffCommand 11 | from pit.commands.init import InitCommand 12 | from pit.commands.log import LogCommand 13 | from pit.commands.status import StatusCommand 14 | from pit.pager import pager 15 | 16 | 17 | def generate_parser(): 18 | """ 19 | >>> parser = generate_parser() 20 | >>> parser.parse_args(['add', '--all']) 21 | Namespace(all=True, cmd='add') 22 | 23 | """ 24 | parser = argparse.ArgumentParser() 25 | subparsers = parser.add_subparsers(help="sub-command help") 26 | 27 | init_cmd = subparsers.add_parser("init", help="init project") 28 | init_cmd.set_defaults(cmd="init") 29 | 30 | commit_cmd = subparsers.add_parser("commit", help="commit help") 31 | commit_cmd.set_defaults(cmd="commit") 32 | commit_cmd.add_argument('-m', '--message', help="commit message", default='') 33 | 34 | add_cmd = subparsers.add_parser("add", help="add help") 35 | add_cmd.set_defaults(cmd="add") 36 | add_cmd.add_argument('files', nargs='+') 37 | 38 | status_cmd = subparsers.add_parser("status", help="status help") 39 | status_cmd.set_defaults(cmd="status") 40 | status_cmd.add_argument('--porcelain', action="store_true") 41 | 42 | diff_cmd = subparsers.add_parser("diff", help="diff help") 43 | diff_cmd.set_defaults(cmd="diff") 44 | diff_cmd.add_argument('--cached', action="store_true") 45 | 46 | branch_cmd = subparsers.add_parser("branch", help="branch help") 47 | branch_cmd.set_defaults(cmd="branch") 48 | branch_cmd.add_argument('name', nargs='?') 49 | branch_cmd.add_argument('revision', nargs='?', default=None) 50 | branch_cmd.add_argument('-D', '--delete', action='store_true') 51 | branch_cmd.add_argument('-v', '--verbose', action='store_true') 52 | 53 | checkout_cmd = subparsers.add_parser("checkout", help="checkout help") 54 | checkout_cmd.set_defaults(cmd="checkout") 55 | checkout_cmd.add_argument('revision', nargs='?', default=None) 56 | 57 | log_cmd = subparsers.add_parser("log", help="log help") 58 | log_cmd.add_argument('--oneline', action='store_true') 59 | log_cmd.set_defaults(cmd="log") 60 | 61 | return parser 62 | 63 | 64 | def entrypoint(): 65 | args = generate_parser().parse_args() 66 | root_dir = os.getcwd() 67 | match args.cmd: 68 | case "init": 69 | InitCommand(root_dir).run() 70 | case "commit": 71 | CommitCommand(root_dir, 72 | author_name=os.getenv("GIT_AUTHOR_NAME"), 73 | author_email=os.getenv("GIT_AUTHOR_EMAIL"), 74 | commit_msg=args.message).run() 75 | case "add": 76 | AddCommand(root_dir, paths=args.files).run() 77 | case "status": 78 | StatusCommand(root_dir, porcelain=args.porcelain).run() 79 | case "diff": 80 | with pager(): 81 | DiffCommand(root_dir, cached=args.cached).run() 82 | case "branch": 83 | BranchCommand(root_dir, name=args.name, revision=args.revision, delete=args.delete, verbose=args.verbose).run() 84 | case "checkout": 85 | CheckoutCommand(root_dir, revision=args.revision).run() 86 | case "log": 87 | with pager(): 88 | LogCommand(root_dir, oneline=args.oneline).run() 89 | case _: 90 | print('Unsupported command: ', args.cmd) 91 | 92 | 93 | if __name__ == "__main__": 94 | entrypoint() 95 | -------------------------------------------------------------------------------- /pit/tree_diff.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | from pit.git_object import TreeEntry 5 | from pit.repository import Repository 6 | 7 | 8 | @dataclass 9 | class Added: 10 | entry: TreeEntry 11 | 12 | 13 | @dataclass 14 | class Deleted: 15 | entry: TreeEntry 16 | 17 | 18 | @dataclass 19 | class Updated: 20 | before: TreeEntry 21 | after: TreeEntry 22 | 23 | 24 | class TreeDiff: 25 | """Diff between two commits""" 26 | 27 | @classmethod 28 | def diff( 29 | cls, before: str, after: str, *, repo: Repository 30 | ) -> dict[str, (TreeEntry, TreeEntry)]: 31 | return { 32 | str(path): entry 33 | for path, entry in cls._diff( 34 | before, after, parent=Path(""), repo=repo 35 | ).items() 36 | } 37 | 38 | @classmethod 39 | def _diff( 40 | cls, 41 | before: str | None, 42 | after: str | None, 43 | parent: Path, 44 | repo: Repository, 45 | ) -> dict[Path, (TreeEntry, TreeEntry)]: 46 | before = repo.database.load(before) if before else None 47 | after = repo.database.load(after) if after else None 48 | 49 | before_children = ( 50 | {child.path: child for child in before.entries} if before else {} 51 | ) 52 | after_children = {child.path: child for child in after.entries} if after else {} 53 | 54 | changes = {} 55 | for added in set(after_children) - set(before_children): 56 | added_child = after_children[added] 57 | if added_child.is_dir(): 58 | changes.update(cls._diff(None, added_child.oid, parent / added, repo)) 59 | else: 60 | changes[parent / added] = Added(added_child) 61 | for deleted in set(before_children) - set(after_children): 62 | deleted_child = before_children[deleted] 63 | if deleted_child.is_dir(): 64 | changes.update( 65 | cls._diff(deleted_child.oid, None, parent / deleted, repo) 66 | ) 67 | else: 68 | changes[parent / deleted] = Deleted(deleted_child) 69 | 70 | modified = [ 71 | child 72 | for child in set(before_children) & set(after_children) 73 | if before_children[child] != after_children[child] 74 | ] 75 | for changed in modified: 76 | before_child, after_child = ( 77 | before_children[changed], 78 | after_children[changed], 79 | ) 80 | if not before_child.is_dir() and not after_child.is_dir(): 81 | changes[parent / changed] = Updated( 82 | before_child, 83 | after_child, 84 | ) 85 | elif before_child.is_dir() and after_child.is_dir(): 86 | changes.update( 87 | cls._diff( 88 | before_child.oid, 89 | after_child.oid, 90 | parent / before_child.path, 91 | repo, 92 | ) 93 | ) 94 | elif before_child.is_dir(): 95 | changes[parent / changed] = Added(after_child) 96 | changes.update( 97 | cls._diff( 98 | before_child.oid, 99 | None, 100 | parent / before_child.path, 101 | repo, 102 | ) 103 | ) 104 | else: 105 | changes[parent / changed] = Deleted(after_child) 106 | changes.update( 107 | cls._diff( 108 | None, 109 | after_child.oid, 110 | parent / after_child.path, 111 | repo, 112 | ) 113 | ) 114 | return changes 115 | -------------------------------------------------------------------------------- /pit/commands/status.py: -------------------------------------------------------------------------------- 1 | from pit.commands.base import BaseCommand 2 | from pit.constants import Color 3 | from pit.repository import FileStatusGroup 4 | from pit.values import GitPath 5 | 6 | 7 | class StatusCommand(BaseCommand): 8 | def __init__(self, root_dir: str, *, porcelain: bool): 9 | super().__init__(root_dir) 10 | self.porcelain = porcelain 11 | 12 | def run(self): 13 | print( 14 | self._display_porcelain(self.repo.status) 15 | if self.porcelain 16 | else self._display_long_format(self.repo.status) 17 | ) 18 | 19 | def _display_porcelain(self, status: FileStatusGroup) -> str: 20 | lines = [] 21 | for path in ( 22 | status.workspace_added 23 | | status.workspace_modified 24 | | status.workspace_deleted 25 | | status.index_deleted 26 | | status.index_modified 27 | | status.index_added 28 | ): 29 | git_path = GitPath(path, root_dir=status.root_dir) 30 | lines.append(f"{self._status_code(path, status)} {git_path}") 31 | lines.sort(key=lambda x: "zz" if x[:2] == "??" else x[3:]) 32 | return "\n".join(lines) 33 | 34 | def _display_long_format(self, status: FileStatusGroup) -> str: 35 | lines = [] 36 | if status.index_added or status.index_modified or status.index_deleted: 37 | lines.append("Changes to be committed:") 38 | lines.append(' (use "git restore --staged ..." to unstage)') 39 | for path in sorted( 40 | status.index_deleted | status.index_modified | status.index_added 41 | ): 42 | git_path = GitPath(path, root_dir=status.root_dir) 43 | lines.append( 44 | f"\t\x1b[7;30;42m{self._status_txt(path, status)} {git_path}\x1b[0m" 45 | ) 46 | 47 | if status.workspace_deleted or status.workspace_modified: 48 | lines.append("\nChanges not staged for commit:") 49 | lines.append( 50 | ' (use "git add/rm ..." to update what will be committed)' 51 | ) 52 | lines.append( 53 | ' (use "git restore ..." to discard changes in working directory)' 54 | ) 55 | for path in sorted(status.workspace_deleted | status.workspace_modified): 56 | git_path = GitPath(path, root_dir=status.root_dir) 57 | lines.append( 58 | f"\t{Color.RED}{self._status_txt(path, status)} {git_path}{Color.RESET_ALL}" 59 | ) 60 | 61 | if status.workspace_added: 62 | lines.append("\nUntracked files:") 63 | lines.append( 64 | ' (use "git add ..." to include in what will be committed)' 65 | ) 66 | for path in sorted(status.workspace_added): 67 | git_path = GitPath(path, root_dir=status.root_dir) 68 | lines.append(f"\t{Color.RED}{git_path}{Color.RESET_ALL}") 69 | if not lines: 70 | return "nothing to commit, working directory clean" 71 | return "".join(["\n".join(lines), "\n"]) 72 | 73 | def _status_txt(self, file_path: str, status: FileStatusGroup) -> str: 74 | if file_path in status.workspace_added or file_path in status.index_added: 75 | return "new file:" 76 | if file_path in status.workspace_modified or file_path in status.index_modified: 77 | return "modified:" 78 | if file_path in status.workspace_deleted or file_path in status.index_deleted: 79 | return "deleted: " 80 | raise NotImplementedError 81 | 82 | def _status_code(self, file_path: str, status: FileStatusGroup) -> str: 83 | codes = [" ", " "] 84 | if file_path in status.workspace_added and file_path not in status.index_added: 85 | return "??" 86 | if file_path in status.workspace_modified: 87 | codes[1] = "M" 88 | elif file_path in status.workspace_added: 89 | codes[1] = "?" 90 | elif file_path in status.workspace_deleted: 91 | codes[1] = "D" 92 | 93 | if file_path in status.index_modified: 94 | codes[0] = "M" 95 | elif file_path in status.index_added: 96 | codes[0] = "A" 97 | elif file_path in status.index_deleted: 98 | codes[0] = "D" 99 | return "".join(codes) 100 | -------------------------------------------------------------------------------- /pit/repository.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from functools import cached_property 3 | from pathlib import Path 4 | 5 | from pit.constants import IGNORE 6 | from pit.database import Database 7 | from pit.git_object import Commit, Tree, TreeEntry 8 | from pit.index import Index 9 | from pit.refs import Refs 10 | from pit.values import GitFileMode 11 | 12 | 13 | @dataclass() 14 | class FileStatusGroup: 15 | root_dir: Path 16 | head_tree: dict[str, TreeEntry] = field(default_factory=dict) 17 | workspace_modified: set[str] = field(default_factory=set) 18 | workspace_added: set[str] = field(default_factory=set) 19 | workspace_deleted: set[str] = field(default_factory=set) 20 | 21 | index_modified: set[str] = field(default_factory=set) 22 | index_added: set[str] = field(default_factory=set) 23 | index_deleted: set[str] = field(default_factory=set) 24 | 25 | 26 | class Repository: 27 | def __init__(self, root_dir: str): 28 | self.root_dir = Path(root_dir) 29 | 30 | @cached_property 31 | def index(self): 32 | return Index(self.root_dir) 33 | 34 | @cached_property 35 | def database(self): 36 | return Database(self.root_dir) 37 | 38 | @cached_property 39 | def refs(self): 40 | return Refs(self.root_dir) 41 | 42 | @cached_property 43 | def ignores(self) -> list[str]: 44 | ignore_file = Path(self.root_dir) / ".gitignore" 45 | if not ignore_file.exists(): 46 | return [".git"] 47 | git_ignores = {str(p).strip() for p in ignore_file.read_text().split("\n") if p} 48 | return list(git_ignores | IGNORE) 49 | 50 | @cached_property 51 | def status(self) -> FileStatusGroup: 52 | status = FileStatusGroup(root_dir=self.root_dir) 53 | 54 | # check workspace / index differences 55 | existed_files = set() 56 | for path in self.root_dir.rglob("*"): 57 | path = path.relative_to(self.root_dir) 58 | existed_files.add(str(path)) 59 | if any(ignore in path.parts for ignore in self.ignores): 60 | continue 61 | if path.is_dir(): 62 | continue 63 | if self.index.has_tracked(path): 64 | if self.index.has_modified(path): 65 | status.workspace_modified.add(str(path)) 66 | continue 67 | 68 | if len(path.parts) == 1: 69 | status.workspace_added.add(str(path)) 70 | else: 71 | for parent in reversed(path.parents[:-1]): 72 | if str(parent) in status.workspace_added: 73 | break 74 | if not self.index.has_tracked(parent): 75 | status.workspace_added.add(str(parent)) 76 | break 77 | else: 78 | status.workspace_added.add(str(path)) 79 | for index_file in self.index.entries: 80 | if index_file not in existed_files: 81 | status.workspace_deleted.add(index_file) 82 | 83 | # check index / commit differences 84 | if self.refs.read_head(): 85 | # noinspection PyTypeChecker 86 | commit: Commit = self.database.load(self.refs.read_head()) 87 | # noinspection PyTypeChecker 88 | tree: Tree = self.database.load(commit.tree_oid) 89 | 90 | def flatten_tree( 91 | tree_entries: list[TreeEntry], parent: str = None 92 | ) -> list[TreeEntry]: 93 | flatten = [] 94 | for entry in tree_entries: 95 | entry_path = f"{parent}/{entry.path}" if parent else entry.path 96 | if GitFileMode(entry.mode).is_file(): 97 | entry.path = entry_path 98 | flatten.append(entry) 99 | continue 100 | # noinspection PyTypeChecker 101 | sub_tree: Tree = self.database.load(entry.oid) 102 | flatten.extend( 103 | flatten_tree( 104 | sub_tree.entries, 105 | parent=entry_path, 106 | ) 107 | ) 108 | return flatten 109 | 110 | flatten_entries = flatten_tree(tree.entries) 111 | commit_entries: dict[str, TreeEntry] = {e.path: e for e in flatten_entries} 112 | else: 113 | commit_entries = {} 114 | status.head_tree = commit_entries 115 | 116 | for entry_path, index_entry in self.index.entries.items(): 117 | commit_entry = commit_entries.get(entry_path) 118 | if not commit_entry: 119 | status.index_added.add(entry_path) 120 | continue 121 | if ( 122 | index_entry.file_hash.hex() != commit_entry.oid 123 | or index_entry.mode != commit_entry.mode 124 | ): 125 | status.index_modified.add(entry_path) 126 | continue 127 | for entry_path in commit_entries.keys(): 128 | if entry_path not in self.index.entries: 129 | status.index_deleted.add(entry_path) 130 | return status 131 | -------------------------------------------------------------------------------- /pit/diff.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from copy import copy, deepcopy 3 | from dataclasses import dataclass 4 | 5 | from pit.constants import Color 6 | 7 | 8 | @dataclass() 9 | class Line: 10 | number: int 11 | text: str | bytes 12 | 13 | def __str__(self): 14 | return self.text.decode() if isinstance(self.text, bytes) else self.text 15 | 16 | 17 | @dataclass 18 | class Edit: 19 | TYPE_COLORS = {"-": Color.RED, "+": Color.GREEN, " ": ""} 20 | type: str 21 | a_line: Line | None 22 | b_line: Line | None 23 | 24 | def __str__(self): 25 | return f"{self.TYPE_COLORS[self.type]}{self.type} {self.a_line or self.b_line}{Color.RESET_ALL}" 26 | 27 | 28 | class Diff: 29 | def __init__(self, a: str | bytes | list, b: str | bytes | list): 30 | self.a = [Line(i + 1, line) for i, line in enumerate(a)] 31 | self.b = [Line(i + 1, line) for i, line in enumerate(b)] 32 | 33 | @classmethod 34 | def from_lines(cls, a: bytes, b: bytes) -> "Diff": 35 | return Diff(a.split(b"\n"), b.split(b"\n")) 36 | 37 | def diff(self): 38 | diff = [] 39 | for ((prev_x, prev_y), (x, y)) in self.backtrack(): 40 | if prev_x == x: 41 | diff.append(Edit("+", a_line=None, b_line=self.b[prev_y])) 42 | elif prev_y == y: 43 | diff.append(Edit("-", a_line=self.a[prev_x], b_line=None)) 44 | else: 45 | diff.append(Edit(" ", a_line=self.a[prev_x], b_line=self.b[prev_y])) 46 | diff.reverse() 47 | return diff 48 | 49 | def shortest_edit(self): 50 | n, m = len(self.a), len(self.b) 51 | max_ = n + m 52 | v: list[int | None] = [None] * (2 * max_ + 1) 53 | v[1] = 0 54 | 55 | trace = [] 56 | for d in range(max_ + 1): 57 | trace.append(v.copy()) 58 | for k in range(-d, d + 1, 2): 59 | # k 为 -d 时,表明一直沿着 y 轴向下移动, 60 | # 又因为 k = x - y,所以 x 不变,y + 1 导致 k_end = x - (y + 1) = k_start - 1 61 | # => k_start = k_end + 1 => x_end = x_start = v[k_end + 1] 62 | if k == -d: 63 | x = v[k + 1] 64 | # k 为 d 时,表明一直沿着 x 轴向右移动, 65 | # 又因为 k = x - y,所以 x + 1,y 不变 导致 k_end = x + 1 - y = k_start + 1 66 | # => k_start = k_end - 1 => x_end = x_start + 1 = v[k_start] + 1 = v[k_end - 1] + 1 67 | elif k == d: 68 | x = v[k - 1] + 1 69 | # k 处于 -d 和 d 中间时,表明当前位置可能由原有位置右移(删除)或下移(增加)而来, 70 | # 又因为我们优先删除,所以当可能右移的位置的 x 值 >= 可能下移的位置时,果断选择右移 71 | # 除非选择下移的位置的 x 值小于右移的 x 值,可以补偿一次下移的损失 72 | elif v[k + 1] > v[k - 1]: 73 | x = v[k + 1] 74 | else: 75 | x = v[k - 1] + 1 76 | # 合并上面的表达式可以简化为 77 | # if k == -d or (k != d and v[k - 1] < v[k + 1]): 78 | # x = v[k + 1] 79 | # else: 80 | # x = v[k - 1] + 1 81 | 82 | y = x - k 83 | # 处理字符相同,可以跳过图中对角线的情况 84 | while x < n and y < m and self.a[x].text == self.b[y].text: 85 | x, y = x + 1, y + 1 86 | v[k] = x 87 | 88 | if x >= n and y >= m: 89 | return trace 90 | 91 | def backtrack(self): 92 | x, y = len(self.a), len(self.b) 93 | for d, v in reversed(list(enumerate(self.shortest_edit()))): 94 | k = x - y 95 | 96 | if k == -d or (k != d and v[k - 1] < v[k + 1]): 97 | prev_k = k + 1 98 | else: 99 | prev_k = k - 1 100 | 101 | prev_x = v[prev_k] 102 | prev_y = prev_x - prev_k 103 | 104 | while x > prev_x and y > prev_y: 105 | yield (x - 1, y - 1), (x, y) 106 | x, y = x - 1, y - 1 107 | 108 | if d > 0: 109 | yield (prev_x, prev_y), (x, y) 110 | x, y = prev_x, prev_y 111 | 112 | def shortest_edit_v2(self): 113 | n, m = len(self.a), len(self.b) 114 | max_ = n + m 115 | 116 | path = namedtuple("path", ["x", "history"]) 117 | histories = {1: path(0, [])} 118 | for d in range(max_ + 1): 119 | for k in range(-d, d + 1, 2): 120 | add = k == -d or (k != d and histories[k - 1].x < histories[k + 1].x) 121 | 122 | prev_x, history = deepcopy( 123 | histories[k + 1] if add else histories[k - 1] 124 | ) 125 | x = prev_x if add else prev_x + 1 126 | y = x - k 127 | if x != 0 or y != 0: 128 | if add: 129 | history.append(Edit("+", None, self.b[y - 1])) 130 | else: 131 | history.append(Edit("-", self.a[x - 1], None)) 132 | # 处理字符相同,可以跳过图中对角线的情况 133 | while x < n and y < m and self.a[x].text == self.b[y].text: 134 | history.append(Edit(" ", self.a[x], self.b[y])) 135 | x, y = x + 1, y + 1 136 | 137 | if x >= n and y >= m: 138 | return history 139 | else: 140 | histories[k] = path(x, history) 141 | 142 | 143 | if __name__ == "__main__": 144 | # for edit in Diff("ABCABBA", "CBABAC").diff(): 145 | # print(edit) 146 | # 《Building Git》中实现的算法没办法正确处理下面的情况, 147 | # 另外一个 Python 实现的版本也有 bug: 148 | # https://gist.github.com/adamnew123456/37923cf53f51d6b9af32a539cdfa7cc4 149 | # 正确实现方式可以见 https://blog.robertelder.org/diff-algorithm/ 150 | for edit in Diff("ACCCAB", "ECCDAB").shortest_edit_v2(): 151 | print(edit) 152 | # for edit in Diff("ACCCAB", "ECCDAB").diff(): 153 | # print(edit) 154 | 155 | """ 156 | - A 157 | - B 158 | C 159 | + B 160 | A 161 | B 162 | - B 163 | A 164 | + C 165 | """ 166 | -------------------------------------------------------------------------------- /pit/commands/diff.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | from dataclasses import dataclass 5 | from pathlib import Path 6 | 7 | from pit.commands.base import BaseCommand 8 | from pit.constants import Color 9 | from pit.database import Database 10 | from pit.diff import Diff 11 | from pit.git_object import TreeEntry 12 | from pit.hunk import Hunk 13 | from pit.index import IndexEntry 14 | from pit.values import ObjectId, GitFileMode 15 | 16 | 17 | @dataclass() 18 | class DiffEntry: 19 | DELETED_PATH = "/dev/null" 20 | DELETED_OID = "0" * 40 21 | file_path: str 22 | mode: str 23 | oid: str 24 | data: bytes 25 | 26 | @classmethod 27 | def from_index_entry( 28 | cls, index_entry: IndexEntry, database: Database 29 | ) -> "DiffEntry": 30 | # noinspection PyUnresolvedReferences 31 | data = ( 32 | database.load(index_entry.oid).content 33 | if database.has_exists(index_entry.oid) 34 | else Path(index_entry.file_path).read_bytes() 35 | ) 36 | return DiffEntry( 37 | file_path=index_entry.file_path, 38 | mode=bytes(GitFileMode(index_entry.mode)).decode(), 39 | oid=index_entry.oid, 40 | data=data, 41 | ) 42 | 43 | @classmethod 44 | def from_tree_entry(cls, tree_entry: TreeEntry, database: Database) -> "DiffEntry": 45 | # noinspection PyUnresolvedReferences 46 | return DiffEntry( 47 | file_path=tree_entry.path, 48 | mode=bytes(GitFileMode(tree_entry.mode)).decode(), 49 | oid=tree_entry.oid, 50 | data=database.load(tree_entry.oid).content, 51 | ) 52 | 53 | @classmethod 54 | def from_deleted( 55 | cls, 56 | ) -> "DiffEntry": 57 | return DiffEntry( 58 | file_path=cls.DELETED_PATH, mode="", oid=cls.DELETED_OID, data=b"" 59 | ) 60 | 61 | @property 62 | def short_oid(self) -> str: 63 | return ObjectId(self.oid).short_id 64 | 65 | @property 66 | def exists(self): 67 | return self.file_path != self.DELETED_PATH 68 | 69 | 70 | @dataclass 71 | class DiffHeader: 72 | a_file: DiffEntry 73 | b_file: DiffEntry 74 | 75 | def display(self): 76 | a_file_path = ( 77 | self.a_file.file_path if self.a_file.exists else self.b_file.file_path 78 | ) 79 | b_file_path = ( 80 | self.b_file.file_path if self.b_file.exists else self.a_file.file_path 81 | ) 82 | 83 | color_prefix = f'{Color.WHITE}{Color.BOLD}' 84 | print(f"{color_prefix}diff --git a/{a_file_path} b/{b_file_path}") 85 | # only mode changed 86 | if self.a_file.oid == self.b_file.oid and self.a_file.mode != self.b_file.mode: 87 | print(f"{color_prefix}old mode: ", self.a_file.mode) 88 | print(f"{color_prefix}new mode: ", self.b_file.mode, Color.RESET_ALL) 89 | return 90 | 91 | if not self.a_file.exists: 92 | print(f"{color_prefix}new file mode", self.b_file.mode) 93 | elif not self.b_file.exists: 94 | print(f"{color_prefix}deleted file mode", self.a_file.mode) 95 | 96 | print( 97 | f"{color_prefix}index {self.a_file.short_oid}..{self.b_file.short_oid} {self.a_file.mode if self.b_file.exists else ''}" 98 | ) 99 | print(f"{color_prefix}--- {'a/' if self.a_file.exists else ''}{self.a_file.file_path}") 100 | print( 101 | f"{color_prefix}+++ {'b/' if self.b_file.exists else ''}{self.b_file.file_path}{Color.RESET_ALL}" 102 | ) 103 | for hunk in Hunk.filters(Diff.from_lines(self.a_file.data, self.b_file.data).diff()): 104 | print(hunk.header()) 105 | for edit in hunk.edits: 106 | print(edit) 107 | 108 | 109 | class DiffCommand(BaseCommand): 110 | def __init__(self, root_dir: str, *, cached: bool): 111 | super().__init__(root_dir) 112 | self.cached = cached 113 | 114 | def run(self): 115 | if self.cached: 116 | self._diff_head_index() 117 | else: 118 | self._diff_index_workspace() 119 | 120 | def _diff_head_index(self): 121 | for file_path in sorted( 122 | self.repo.status.index_added 123 | | self.repo.status.index_modified 124 | | self.repo.status.index_deleted 125 | ): 126 | head_entry = ( 127 | self.repo.status.head_tree[file_path] 128 | if file_path not in self.repo.status.index_added 129 | else None 130 | ) 131 | index_entry = ( 132 | self.repo.index.entries[file_path] 133 | if file_path not in self.repo.status.index_deleted 134 | else None 135 | ) 136 | 137 | DiffHeader( 138 | a_file=DiffEntry.from_tree_entry(head_entry, self.repo.database) 139 | if head_entry 140 | else DiffEntry.from_deleted(), 141 | b_file=DiffEntry.from_index_entry(index_entry, self.repo.database) 142 | if index_entry 143 | else DiffEntry.from_deleted(), 144 | ).display() 145 | 146 | def _diff_index_workspace(self): 147 | for file_path in sorted( 148 | self.repo.status.workspace_deleted | self.repo.status.workspace_modified 149 | ): 150 | index_entry = self.repo.index.entries[file_path] 151 | workspace_entry = ( 152 | IndexEntry.from_file(Path(file_path)) 153 | if file_path in self.repo.status.workspace_modified 154 | else None 155 | ) 156 | 157 | DiffHeader( 158 | a_file=DiffEntry.from_index_entry(index_entry, self.repo.database), 159 | b_file=DiffEntry.from_index_entry(workspace_entry, self.repo.database) 160 | if workspace_entry 161 | else DiffEntry.from_deleted(), 162 | ).display() 163 | -------------------------------------------------------------------------------- /pit/git_object.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from dataclasses import dataclass, field 3 | from pathlib import Path 4 | 5 | from pit.values import GitFileMode, AuthorSign 6 | 7 | 8 | @dataclass 9 | class GitObject: 10 | type: str = field(init=False) 11 | oid: str = field(init=False) 12 | 13 | def __bytes__(self): 14 | raise NotImplementedError 15 | 16 | def from_raw(self, raw: bytes) -> "GitObject": 17 | raise NotImplementedError 18 | 19 | 20 | @dataclass() 21 | class Blob(GitObject): 22 | content: bytes 23 | 24 | def __post_init__(self): 25 | self.type = "blob" 26 | self.oid = hashlib.sha1(bytes(self)).hexdigest() 27 | 28 | @classmethod 29 | def from_raw(cls, raw: bytes) -> "Blob": 30 | return Blob(raw.split(b"\x00", 1)[1]) 31 | 32 | def __bytes__(self): 33 | return b"%s %d\x00%s" % ( 34 | self.type.encode(), 35 | len(self.content), 36 | self.content, 37 | ) 38 | 39 | 40 | @dataclass() 41 | class Commit(GitObject): 42 | tree_oid: str 43 | author: AuthorSign 44 | message: str 45 | parent_oid: str = None 46 | 47 | def __post_init__(self): 48 | self.type = "commit" 49 | self.oid = hashlib.sha1(bytes(self)).hexdigest() 50 | 51 | def __bytes__(self): 52 | # b'commit 188\x00tree 2e81171448eb9f2ee3821e3d447aa6b2fe3ddba1\nauthor shidenggui 1635305754 +0800\ncommitter shidenggui 1635305754 +0800\n\nadd a.txt\n' 53 | contents = [b"tree %s" % self.tree_oid.encode()] 54 | if self.parent_oid: 55 | contents.append(b"parent %s" % self.parent_oid.encode()) 56 | contents.append(b"author %s\ncommitter %s" % (self.author, self.author)) 57 | contents.append(b"\n%s\n" % self.message.encode()) 58 | content = b"\n".join(contents) 59 | return b"commit %d\x00%s" % (len(content), content) 60 | 61 | @property 62 | def title(self) -> str: 63 | return self.message.split("\n", 1)[0] 64 | 65 | @classmethod 66 | def from_raw(cls, raw: bytes) -> "Commit": 67 | """ 68 | In [3]: c.split(b'\n') 69 | Out[3]: 70 | [b'commit 231\x00tree 798e9d13e6a2b6fcccf20ffb345222462fd4e891', 71 | b'parent 246c46f09964b12c95aaf73f21a69af0d670e019', 72 | b'author shidenggui 1636459276 +0800', 73 | b'committer shidenggui 1636459276 +0800', 74 | b'', 75 | b'init', 76 | b''] 77 | :param raw: 78 | :return: 79 | """ 80 | lines = raw.split(b"\n") 81 | _, tree_info = lines[0].split(b"\x00") 82 | tree_oid = tree_info[5:].decode() 83 | parent_oid = lines[1][7:].decode() if lines[1].startswith(b"parent") else None 84 | 85 | line_no = 2 if parent_oid else 1 86 | _, author_name, author_email, timestamp, timezone = lines[line_no].split(b" ") 87 | 88 | # remove <> around '' 89 | author_email = author_email[1:-1] 90 | line_no += 3 91 | commit_msg = b"\n".join(lines[line_no:-1]).decode() 92 | return Commit( 93 | tree_oid=tree_oid, 94 | author=AuthorSign( 95 | name=author_name.decode(), 96 | email=author_email.decode(), 97 | timestamp=int(timestamp), 98 | timezone=timezone.decode(), 99 | ), 100 | message=commit_msg, 101 | parent_oid=parent_oid, 102 | ) 103 | 104 | 105 | @dataclass(unsafe_hash=True) 106 | class TreeEntry: 107 | oid: str 108 | path: str 109 | mode: int 110 | 111 | def __bytes__(self): 112 | return b"%s %s\x00%s" % ( 113 | GitFileMode(self.mode), 114 | Path(self.path).name.encode(), 115 | bytes.fromhex(self.oid), 116 | ) 117 | 118 | def is_dir(self) -> bool: 119 | return GitFileMode(self.mode).is_dir() 120 | 121 | @classmethod 122 | def from_raw(cls, raw: bytes) -> "TreeEntry": 123 | mode_len = 5 if raw.startswith(b"4") else 6 124 | mode = GitFileMode.from_raw(raw[:mode_len]).mode 125 | 126 | file_path_end = raw[mode_len + 1 :].find(b"\x00") + mode_len + 1 127 | file_path = raw[mode_len + 1 : file_path_end] 128 | oid = raw[file_path_end + 1 : file_path_end + 21] 129 | return TreeEntry(oid=oid.hex(), path=file_path.decode(), mode=mode) 130 | 131 | 132 | @dataclass() 133 | class Tree(GitObject): 134 | entries: list["TreeEntry"] 135 | 136 | def __post_init__(self): 137 | self.type = "tree" 138 | self.entries = sorted(self.entries, key=lambda x: Path(x.path).name) 139 | 140 | @property 141 | def oid(self): 142 | return hashlib.sha1(bytes(self)).hexdigest() 143 | 144 | @classmethod 145 | def from_raw(cls, raw: bytes) -> "Tree": 146 | head, entries_info = raw.split(b"\x00", 1) 147 | entries = [] 148 | start = len(head) + 1 149 | while start < len(raw): 150 | entry = TreeEntry.from_raw(raw[start:]) 151 | entries.append(entry) 152 | start += len(bytes(entry)) 153 | return Tree(entries=entries) 154 | 155 | def __bytes__(self): 156 | contents = [bytes(entry) for entry in self.entries] 157 | content = b"".join(contents) 158 | return b"tree %d\x00%s" % (len(content), content) 159 | 160 | 161 | if __name__ == "__main__": 162 | print("Test blob") 163 | a_txt = Blob(content=b"hello\n") 164 | print(a_txt) 165 | assert bytes(a_txt) == b"blob 6\x00hello\n" 166 | 167 | print("Test Tree") 168 | a_tree = Tree( 169 | entries=[TreeEntry(path="a.txt", oid=a_txt.oid, mode=int(b"100644", 8))] 170 | ) 171 | print(a_tree) 172 | assert ( 173 | bytes(a_tree) 174 | == b"tree 33\x00100644 a.txt\x00\xce\x016%\x03\x0b\xa8\xdb\xa9\x06\xf7V\x96\x7f\x9e\x9c\xa3\x94FJ" 175 | ), bytes(a_tree) 176 | 177 | print("Test Commit") 178 | a_commit = Commit( 179 | tree_oid=a_tree.oid, 180 | author=AuthorSign( 181 | name="shidenggui", 182 | email="longlyshidenggui@gmail.com", 183 | timestamp=1635305754, 184 | timezone="+0800", 185 | ), 186 | message="add a.txt", 187 | ) 188 | print(a_commit) 189 | expected_commit = b"commit 188\x00tree 2e81171448eb9f2ee3821e3d447aa6b2fe3ddba1\nauthor shidenggui 1635305754 +0800\ncommitter shidenggui 1635305754 +0800\n\nadd a.txt\n" 190 | assert bytes(a_commit) == expected_commit, bytes(a_tree) 191 | assert Commit.from_raw(expected_commit) == a_commit, Commit.from_raw( 192 | expected_commit 193 | ) 194 | 195 | # with parent 196 | expected_commit = b"commit 231\x00tree 798e9d13e6a2b6fcccf20ffb345222462fd4e891\nparent 246c46f09964b12c95aaf73f21a69af0d670e019\nauthor shidenggui 1636459276 +0800\ncommitter shidenggui 1636459276 +0800\n\ninit\n" 197 | assert Commit.from_raw(expected_commit) == Commit( 198 | tree_oid="798e9d13e6a2b6fcccf20ffb345222462fd4e891", 199 | author=AuthorSign( 200 | name="shidenggui", 201 | email="longlyshidenggui@gmail.com", 202 | timestamp=1636459276, 203 | timezone="+0800", 204 | ), 205 | message="init", 206 | parent_oid="246c46f09964b12c95aaf73f21a69af0d670e019", 207 | ), Commit.from_raw(expected_commit) 208 | 209 | print("Parse tree") 210 | tree_raw = b'tree 128\x0040000 a\x00\xe9\x11P\x95\xc46];\xe6l\xbeH\xaf\x1d\x1d3\xb3\x1cWi100644 hello.txt\x00\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91100644 ttt\x00ax\x07\x98"\x8d\x17\xaf-4\xfc\xe4\xcf\xbd\xf3UV\x83$r100644 txt\x00\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91' 211 | assert Tree.from_raw(tree_raw) == Tree( 212 | entries=[ 213 | TreeEntry( 214 | oid="00e9115095c4365d3be66cbe48af1d1d33b31c57", path="a", mode=16384 215 | ), 216 | TreeEntry( 217 | oid="00e69de29bb2d1d6434b8b29ae775ad8c2e48c53", 218 | path="hello.txt", 219 | mode=33188, 220 | ), 221 | TreeEntry( 222 | oid="0061780798228d17af2d34fce4cfbdf355568324", path="ttt", mode=33188 223 | ), 224 | TreeEntry( 225 | oid="00e69de29bb2d1d6434b8b29ae775ad8c2e48c53", path="txt", mode=33188 226 | ), 227 | ], 228 | ), Tree.from_raw(tree_raw) 229 | -------------------------------------------------------------------------------- /pit/index.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import re 3 | from functools import cached_property 4 | from pathlib import Path 5 | import hashlib 6 | 7 | from pit.git_object import Blob, TreeEntry 8 | from pit.values import GitFileMode, GitPath 9 | import os 10 | 11 | 12 | @dataclass 13 | class IndexHeader: 14 | """ 15 | - 12 bytes 的头部 16 | - 4 bytes 固定为 DIRC,为 dircache 的缩写 17 | - 4 bytes 是 version number,支持2,3,4 18 | - 4 bytes 为 int32,index entries 的数量 19 | """ 20 | 21 | prefix: str 22 | version: int 23 | entries: int 24 | 25 | @classmethod 26 | def from_raw(cls, raw: bytes) -> "IndexHeader": 27 | if raw == b"": 28 | return IndexHeader( 29 | prefix="DIRC", 30 | version=int.from_bytes(b"\x00\x00\x00\x02", "big"), 31 | entries=int.from_bytes(b"\x00\x00\x00\x00", "big"), 32 | ) 33 | return IndexHeader( 34 | prefix=raw[:4].decode(), 35 | version=int.from_bytes(raw[4:8], "big"), 36 | entries=int.from_bytes(raw[8:12], "big"), 37 | ) 38 | 39 | def __bytes__(self): 40 | return b"%s%s%s" % ( 41 | self.prefix.encode(), 42 | self.version.to_bytes(4, "big"), 43 | self.entries.to_bytes(4, "big"), 44 | ) 45 | 46 | 47 | @dataclass() 48 | class IndexEntry: 49 | """ 50 | - 10 个 4 bytes 表示下列值 51 | - 32-bit ctime seconds, the last time a file's metadata changed 52 | - 32-bit ctime nansecond fractions 53 | - 32-bit mtime seconds, the last time a file's data changed 54 | - 32-bit mtime nanasecond fractions 55 | - 32-bit dev 56 | - 32-bit ino 57 | - 32-bit mode 58 | - 32-bit uid 59 | - 32-bit gid 60 | - 32-bit file size 61 | - 160-bit (20-byte) SHA-1 62 | - 16-bit(2-byte) other information,其中包含了文件名的长度 63 | - 比如 00 08 66 69 6c 65 2e 74 ..file.txt 64 | - filename 65 | - 最后 padding zero 使得整个 entry 的长度是 8 的倍数 66 | - 所有 entries 的最后 60-bit (20-byte) SHA-1 是整个 index 的 hash,防止数据丢失 67 | """ 68 | 69 | ctime: int 70 | ctime_ns: int 71 | mtime: int 72 | mtime_ns: int 73 | dev: int 74 | ino: int 75 | mode: int 76 | uid: int 77 | gid: int 78 | file_size: int 79 | file_hash: bytes 80 | file_path_length: int 81 | file_path: str 82 | 83 | @cached_property 84 | def oid(self) -> str: 85 | return self.file_hash.hex() 86 | 87 | def to_tree_entry(self) -> TreeEntry: 88 | return TreeEntry(oid=self.file_hash.hex(), path=self.file_path, mode=self.mode) 89 | 90 | def __bytes__(self): 91 | return b"%s%s%s%s%s%s%s%s%s%s%s%s%s%s" % ( 92 | self.ctime.to_bytes(4, "big"), 93 | self.ctime_ns.to_bytes(4, "big"), 94 | self.mtime.to_bytes(4, "big"), 95 | self.mtime_ns.to_bytes(4, "big"), 96 | self.dev.to_bytes(4, "big"), 97 | self.ino.to_bytes(8, "big")[-4:], 98 | int(bytes(GitFileMode(self.mode)), 8).to_bytes(4, "big"), 99 | self.uid.to_bytes(4, "big"), 100 | self.gid.to_bytes(4, "big"), 101 | self.file_size.to_bytes(4, "big"), 102 | self.file_hash, 103 | self.file_path_length.to_bytes(2, "big"), 104 | self.file_path.encode() + b"\x00", 105 | b"\x00" * self.padding_zeros, 106 | ) 107 | 108 | @classmethod 109 | def from_raw(cls, raw: bytes): 110 | file_path_length = int.from_bytes(raw[60:62], "big") 111 | return IndexEntry( 112 | ctime=int.from_bytes(raw[:4], "big"), 113 | ctime_ns=int.from_bytes(raw[4:8], "big"), 114 | mtime=int.from_bytes(raw[8:12], "big"), 115 | mtime_ns=int.from_bytes(raw[12:16], "big"), 116 | dev=int.from_bytes(raw[16:20], "big"), 117 | ino=int.from_bytes(raw[20:24], "big"), 118 | mode=int.from_bytes(raw[24:28], "big"), 119 | uid=int.from_bytes(raw[28:32], "big"), 120 | gid=int.from_bytes(raw[32:36], "big"), 121 | file_size=int.from_bytes(raw[36:40], "big"), 122 | file_hash=raw[40:60], 123 | file_path_length=file_path_length, 124 | file_path=raw[62 : 62 + file_path_length].decode(), 125 | ) 126 | 127 | @classmethod 128 | def from_file(cls, file: Path) -> "IndexEntry": 129 | """ 130 | os.stat_result( 131 | st_mode=33261, 132 | st_ino=12943009244, 133 | st_dev=16777220, 134 | st_nlink=1, 135 | st_uid=501, 136 | st_gid=20, 137 | st_size=6, 138 | st_atime=1635484838, 139 | st_mtime=1635484837, 140 | st_ctime=1635484871) 141 | :param file: 142 | :return: 143 | """ 144 | # TODO: when file_path is longer than 4096 bytes, 145 | # the max value of the file_path_length should be set to 4096 bytes 146 | # when restoring the overflowed file_path, we should use incremental scanning 147 | blob = Blob(file.read_bytes()) 148 | file_hash = bytes.fromhex(blob.oid) 149 | file_stat = file.stat() 150 | return IndexEntry( 151 | ctime=int(file_stat.st_ctime), 152 | ctime_ns=int(file_stat.st_ctime_ns - int(file_stat.st_ctime) * 10 ** 9), 153 | mtime=int(file_stat.st_mtime), 154 | mtime_ns=int(file_stat.st_mtime_ns - int(file_stat.st_mtime) * 10 ** 9), 155 | dev=file_stat.st_dev, 156 | ino=int.from_bytes(file_stat.st_ino.to_bytes(8, "big")[-4:], "big"), 157 | mode=file_stat.st_mode, 158 | uid=file_stat.st_uid, 159 | gid=file_stat.st_gid, 160 | file_size=file_stat.st_size, 161 | file_hash=file_hash, 162 | file_path_length=len(str(file)), 163 | file_path=str(file), 164 | ) 165 | 166 | @property 167 | def padding_zeros(self): 168 | # + 1 because file_path ends with '\x00' 169 | entry_length = 62 + self.file_path_length + 1 170 | return 8 - entry_length % 8 if entry_length % 8 else 0 171 | 172 | @property 173 | def length(self): 174 | return len(bytes(self)) 175 | 176 | 177 | class Index: 178 | entries: dict[str, IndexEntry] 179 | header: IndexHeader 180 | 181 | def __init__(self, root_dir: Path): 182 | self._root_dir = root_dir 183 | self._git_dir = self._root_dir / ".git" 184 | self.index_path = self._git_dir / "index" 185 | self.header, self.entries = self._parse() 186 | 187 | def __repr__(self): 188 | return f"" 189 | 190 | def __bytes__(self): 191 | self.header.entries = len(self.entries) 192 | data = b"%s%s" % ( 193 | self.header, 194 | b"".join( 195 | [ 196 | bytes(e) 197 | for e in sorted(self.entries.values(), key=lambda e: e.file_path) 198 | ] 199 | ), 200 | ) 201 | 202 | return b"%s%s" % ( 203 | data, 204 | hashlib.sha1(data).digest(), 205 | ) 206 | 207 | def has_tracked(self, path: Path) -> bool: 208 | path = str(Path(path).resolve().relative_to(self._root_dir.resolve())) 209 | if path in self.entries: 210 | return True 211 | if path in self.parents: 212 | return True 213 | return False 214 | 215 | def has_modified(self, path: Path) -> bool: 216 | return IndexEntry.from_file(path) != self.entries.get( 217 | str(GitPath(path, root_dir=self._root_dir)) 218 | ) 219 | 220 | @property 221 | def parents(self): 222 | parents = set() 223 | for path in self.entries: 224 | for p in Path(path).parents: 225 | parents.add(str(p)) 226 | return parents 227 | 228 | def add_file(self, file_path: Path | str): 229 | # if sub path try to format the sub path to the path relative to the root dir 230 | file_path = Path(file_path).resolve().relative_to(self._root_dir.resolve()) 231 | for parent_dir in file_path.parents: 232 | self.entries.pop(str(parent_dir), None) 233 | 234 | new_entry = IndexEntry.from_file(file_path) 235 | self.entries[new_entry.file_path] = new_entry 236 | self.header.entries = len(self.entries) 237 | 238 | def remove_file(self, file_path: Path | str): 239 | # if sub path try to format the sub path to the path relative to the root dir 240 | git_path = GitPath(file_path, self._root_dir) 241 | self.entries.pop(str(git_path), None) 242 | self.header.entries = len(self.entries) 243 | 244 | def clean(self): 245 | """Clean deleted files""" 246 | for entry_file_path in list(self.entries): 247 | if not os.path.exists(entry_file_path): 248 | self.entries.pop(entry_file_path) 249 | self.header.entries = len(self.entries) 250 | 251 | def _parse(self): 252 | raw = self.index_path.read_bytes() if self.index_path.exists() else b"" 253 | header = IndexHeader.from_raw(raw) 254 | entries = {} 255 | scanned = 12 256 | for _ in range(header.entries): 257 | entry = IndexEntry.from_raw(raw[scanned:]) 258 | entries[entry.file_path] = entry 259 | scanned += entry.length 260 | 261 | return header, entries 262 | 263 | 264 | if __name__ == "__main__": 265 | index = Index("/Users/apple/works/projects/learn/learn_pit/ex2_nested") 266 | print(index) 267 | hex_index = (b"%s" % index).hex() 268 | print(re.findall("\w{4}", hex_index)) 269 | --------------------------------------------------------------------------------