From 67881c0c3b27d850c57070919f4476b370de7768 Mon Sep 17 00:00:00 2001 From: Gavin Mak Date: Fri, 6 Feb 2026 14:55:34 -0800 Subject: [PATCH] git_refs: read refs via git plumbing for files/reftable Replace direct `packed-refs` file parsing with `git for-each-ref` plumbing to support both `files` and `reftable` backends. Bug: 476209856 Change-Id: I2ad8ff8f3382426600f15370c997f9bc17165485 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/550401 Commit-Queue: Gavin Mak Reviewed-by: Mike Frysinger Tested-by: Gavin Mak --- git_command.py | 3 +- git_refs.py | 145 ++++++++++++++++++++++++---------------- tests/test_git_refs.py | 99 +++++++++++++++++++++++++++ tests/utils_for_test.py | 52 +++++++++++++- 4 files changed, 237 insertions(+), 62 deletions(-) create mode 100644 tests/test_git_refs.py diff --git a/git_command.py b/git_command.py index 04844079f..89e8d2f44 100644 --- a/git_command.py +++ b/git_command.py @@ -22,7 +22,6 @@ from typing import Any, Optional from error import GitError from error import RepoExitError -from git_refs import HEAD from git_trace2_event_log_base import BaseEventLog import platform_utils from repo_logging import RepoLogger @@ -83,7 +82,7 @@ def RepoSourceVersion(): proj = os.path.dirname(os.path.abspath(__file__)) env[GIT_DIR] = os.path.join(proj, ".git") result = subprocess.run( - [GIT, "describe", HEAD], + [GIT, "describe", "HEAD"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, encoding="utf-8", diff --git a/git_refs.py b/git_refs.py index 237c15ae2..e3da103a0 100644 --- a/git_refs.py +++ b/git_refs.py @@ -14,6 +14,7 @@ import os +from git_command import GitCommand import platform_utils from repo_trace import Trace @@ -86,9 +87,8 @@ class GitRefs: self._symref = {} self._mtime = {} - self._ReadPackedRefs() - self._ReadLoose("refs/") - self._ReadLoose1(os.path.join(self._gitdir, HEAD), HEAD) + self._ReadRefs() + self._ReadSymbolicRef(HEAD) scan = self._symref attempts = 0 @@ -102,64 +102,95 @@ class GitRefs: scan = scan_next attempts += 1 - def _ReadPackedRefs(self): - path = os.path.join(self._gitdir, "packed-refs") + self._TrackMtime(HEAD) + self._TrackMtime("config") + self._TrackMtime("packed-refs") + self._TrackTreeMtimes("refs") + self._TrackTreeMtimes("reftable") + + @staticmethod + def _IsNullRef(ref_id: str) -> bool: + """Check if a ref_id is a null object ID.""" + return ref_id and all(ch == "0" for ch in ref_id) + + def _ReadRefs(self) -> None: + """Read all references using git for-each-ref.""" + p = GitCommand( + None, + ["for-each-ref", "--format=%(objectname)%00%(refname)%00%(symref)"], + capture_stdout=True, + capture_stderr=True, + bare=True, + gitdir=self._gitdir, + ) + if p.Wait() != 0: + return + + for line in p.stdout.splitlines(): + ref_id, name, symref = line.split("\0") + if symref: + self._symref[name] = symref + elif ref_id and not self._IsNullRef(ref_id): + self._phyref[name] = ref_id + + def _ReadSymbolicRef(self, name: str) -> None: + """Read a symbolic reference.""" + p = GitCommand( + None, + ["symbolic-ref", "-q", name], + capture_stdout=True, + capture_stderr=True, + bare=True, + gitdir=self._gitdir, + ) + if p.Wait() == 0: + ref = p.stdout.strip() + if ref: + self._symref[name] = ref + return + + p = GitCommand( + None, + ["rev-parse", "--verify", "-q", name], + capture_stdout=True, + capture_stderr=True, + bare=True, + gitdir=self._gitdir, + ) + if p.Wait() == 0: + ref_id = p.stdout.strip() + if ref_id: + self._phyref[name] = ref_id + + def _TrackMtime(self, name: str) -> None: + """Track the modification time of a specific gitdir path.""" + path = os.path.join(self._gitdir, name) try: - fd = open(path) - mtime = os.path.getmtime(path) + self._mtime[name] = os.path.getmtime(path) except OSError: return + + def _TrackTreeMtimes(self, root: str) -> None: + """Recursively track modification times for a directory tree.""" + root_path = os.path.join(self._gitdir, root) try: - for line in fd: - line = str(line) - if line[0] == "#": - continue - if line[0] == "^": - continue - - line = line[:-1] - p = line.split(" ") - ref_id = p[0] - name = p[1] - - self._phyref[name] = ref_id - finally: - fd.close() - self._mtime["packed-refs"] = mtime - - def _ReadLoose(self, prefix): - base = os.path.join(self._gitdir, prefix) - for name in platform_utils.listdir(base): - p = os.path.join(base, name) - # We don't implement the full ref validation algorithm, just the - # simple rules that would show up in local filesystems. - # https://git-scm.com/docs/git-check-ref-format - if name.startswith(".") or name.endswith(".lock"): - pass - elif platform_utils.isdir(p): - self._mtime[prefix] = os.path.getmtime(base) - self._ReadLoose(prefix + name + "/") - else: - self._ReadLoose1(p, prefix + name) - - def _ReadLoose1(self, path, name): - try: - with open(path) as fd: - mtime = os.path.getmtime(path) - ref_id = fd.readline() - except (OSError, UnicodeError): + if not platform_utils.isdir(root_path): + return + except OSError: return - try: - ref_id = ref_id.decode() - except AttributeError: - pass - if not ref_id: - return - ref_id = ref_id[:-1] + to_scan = [root] + while to_scan: + name = to_scan.pop() + self._TrackMtime(name) + path = os.path.join(self._gitdir, name) + if not platform_utils.isdir(path): + continue - if ref_id.startswith("ref: "): - self._symref[name] = ref_id[5:] - else: - self._phyref[name] = ref_id - self._mtime[name] = mtime + for child in platform_utils.listdir(path): + child_name = os.path.join(name, child) + child_path = os.path.join(self._gitdir, child_name) + if platform_utils.isdir(child_path): + to_scan.append(child_name) + else: + self._TrackMtime(child_name) diff --git a/tests/test_git_refs.py b/tests/test_git_refs.py new file mode 100644 index 000000000..70c0e08ad --- /dev/null +++ b/tests/test_git_refs.py @@ -0,0 +1,99 @@ +# Copyright (C) 2026 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unittests for the git_refs.py module.""" + +import os +from pathlib import Path +import subprocess + +import pytest +import utils_for_test + +import git_refs + + +def _run(repo, *args): + return subprocess.run( + ["git", "-C", repo, *args], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + check=True, + ).stdout.strip() + + +def _init_repo(tmp_path, reftable=False): + repo = os.path.join(tmp_path, "repo") + ref_format = "reftable" if reftable else "files" + utils_for_test.init_git_tree(repo, ref_format=ref_format) + + Path(os.path.join(repo, "a")).write_text("1") + _run(repo, "add", "a") + _run(repo, "commit", "-q", "-m", "init") + return repo + + +@pytest.mark.parametrize("reftable", [False, True]) +def test_reads_refs(tmp_path, reftable): + if reftable and not utils_for_test.supports_reftable(): + pytest.skip("reftable not supported") + + repo = _init_repo(tmp_path, reftable=reftable) + gitdir = os.path.join(repo, ".git") + refs = git_refs.GitRefs(gitdir) + + branch = _run(repo, "symbolic-ref", "--short", "HEAD") + head = _run(repo, "rev-parse", "HEAD") + assert refs.symref("HEAD") == f"refs/heads/{branch}" + assert refs.get("HEAD") == head + assert refs.get(f"refs/heads/{branch}") == head + + +@pytest.mark.parametrize("reftable", [False, True]) +def test_updates_when_refs_change(tmp_path, reftable): + if reftable and not utils_for_test.supports_reftable(): + pytest.skip("reftable not supported") + + repo = _init_repo(tmp_path, reftable=reftable) + gitdir = os.path.join(repo, ".git") + refs = git_refs.GitRefs(gitdir) + + head = _run(repo, "rev-parse", "HEAD") + assert refs.get("refs/heads/topic") == "" + _run(repo, "branch", "topic") + assert refs.get("refs/heads/topic") == head + _run(repo, "branch", "-D", "topic") + assert refs.get("refs/heads/topic") == "" + + +@pytest.mark.skipif( + not utils_for_test.supports_refs_migrate(), + reason="git refs migrate reftable support is required for this test", +) +def test_updates_when_storage_backend_toggles(tmp_path): + repo = _init_repo(tmp_path, reftable=False) + gitdir = os.path.join(repo, ".git") + refs = git_refs.GitRefs(gitdir) + + head = _run(repo, "rev-parse", "HEAD") + assert refs.get("refs/heads/reftable-branch") == "" + _run(repo, "refs", "migrate", "--ref-format=reftable") + _run(repo, "branch", "reftable-branch") + assert refs.get("refs/heads/reftable-branch") == head + + assert refs.get("refs/heads/files-branch") == "" + _run(repo, "refs", "migrate", "--ref-format=files") + _run(repo, "branch", "files-branch") + assert refs.get("refs/heads/files-branch") == head diff --git a/tests/utils_for_test.py b/tests/utils_for_test.py index e162e8666..f48613f34 100644 --- a/tests/utils_for_test.py +++ b/tests/utils_for_test.py @@ -18,20 +18,28 @@ If you want to write a per-test fixture, see conftest.py instead. """ import contextlib +import functools from pathlib import Path import subprocess import tempfile -from typing import Union +from typing import Optional, Union import git_command -def init_git_tree(path: Union[str, Path]) -> None: +def init_git_tree( + path: Union[str, Path], + ref_format: Optional[str] = None, +) -> None: """Initialize `path` as a new git repo.""" with contextlib.ExitStack() as stack: # Tests need to assume, that main is default branch at init, # which is not supported in config until 2.28. - cmd = ["git", "init"] + cmd = ["git"] + if ref_format: + cmd += ["-c", f"init.defaultRefFormat={ref_format}"] + cmd += ["init"] + if git_command.git_require((2, 28, 0)): cmd += ["--initial-branch=main"] else: @@ -51,3 +59,41 @@ def TempGitTree(): with tempfile.TemporaryDirectory(prefix="repo-tests") as tempdir: init_git_tree(tempdir) yield tempdir + + +@functools.lru_cache(maxsize=None) +def supports_reftable() -> bool: + """Check if git supports reftable.""" + with tempfile.TemporaryDirectory(prefix="repo-tests") as tempdir: + proc = subprocess.run( + ["git", "-c", "init.defaultRefFormat=reftable", "init"], + cwd=tempdir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + return proc.returncode == 0 + + +@functools.lru_cache(maxsize=None) +def supports_refs_migrate() -> bool: + """Check if git supports refs migrate.""" + with tempfile.TemporaryDirectory(prefix="repo-tests") as tempdir: + subprocess.check_call( + ["git", "-c", "init.defaultRefFormat=files", "init"], + cwd=tempdir, + ) + proc = subprocess.run( + [ + "git", + "refs", + "migrate", + "--ref-format=reftable", + "--dry-run", + ], + cwd=tempdir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + return proc.returncode == 0