git_refs: read refs via git plumbing for files/reftable

Replace direct `packed-refs` file parsing with `git for-each-ref`
plumbing to support both `files` and `reftable` backends.

Bug: 476209856
Change-Id: I2ad8ff8f3382426600f15370c997f9bc17165485
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/550401
Commit-Queue: Gavin Mak <gavinmak@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: Gavin Mak <gavinmak@google.com>
This commit is contained in:
Gavin Mak
2026-02-06 14:55:34 -08:00
committed by LUCI
parent 551087cd98
commit 67881c0c3b
4 changed files with 237 additions and 62 deletions
+1 -2
View File
@@ -22,7 +22,6 @@ from typing import Any, Optional
from error import GitError
from error import RepoExitError
from git_refs import HEAD
from git_trace2_event_log_base import BaseEventLog
import platform_utils
from repo_logging import RepoLogger
@@ -83,7 +82,7 @@ def RepoSourceVersion():
proj = os.path.dirname(os.path.abspath(__file__))
env[GIT_DIR] = os.path.join(proj, ".git")
result = subprocess.run(
[GIT, "describe", HEAD],
[GIT, "describe", "HEAD"],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding="utf-8",
+88 -57
View File
@@ -14,6 +14,7 @@
import os
from git_command import GitCommand
import platform_utils
from repo_trace import Trace
@@ -86,9 +87,8 @@ class GitRefs:
self._symref = {}
self._mtime = {}
self._ReadPackedRefs()
self._ReadLoose("refs/")
self._ReadLoose1(os.path.join(self._gitdir, HEAD), HEAD)
self._ReadRefs()
self._ReadSymbolicRef(HEAD)
scan = self._symref
attempts = 0
@@ -102,64 +102,95 @@ class GitRefs:
scan = scan_next
attempts += 1
def _ReadPackedRefs(self):
path = os.path.join(self._gitdir, "packed-refs")
self._TrackMtime(HEAD)
self._TrackMtime("config")
self._TrackMtime("packed-refs")
self._TrackTreeMtimes("refs")
self._TrackTreeMtimes("reftable")
@staticmethod
def _IsNullRef(ref_id: str) -> bool:
"""Check if a ref_id is a null object ID."""
return ref_id and all(ch == "0" for ch in ref_id)
def _ReadRefs(self) -> None:
"""Read all references using git for-each-ref."""
p = GitCommand(
None,
["for-each-ref", "--format=%(objectname)%00%(refname)%00%(symref)"],
capture_stdout=True,
capture_stderr=True,
bare=True,
gitdir=self._gitdir,
)
if p.Wait() != 0:
return
for line in p.stdout.splitlines():
ref_id, name, symref = line.split("\0")
if symref:
self._symref[name] = symref
elif ref_id and not self._IsNullRef(ref_id):
self._phyref[name] = ref_id
def _ReadSymbolicRef(self, name: str) -> None:
"""Read a symbolic reference."""
p = GitCommand(
None,
["symbolic-ref", "-q", name],
capture_stdout=True,
capture_stderr=True,
bare=True,
gitdir=self._gitdir,
)
if p.Wait() == 0:
ref = p.stdout.strip()
if ref:
self._symref[name] = ref
return
p = GitCommand(
None,
["rev-parse", "--verify", "-q", name],
capture_stdout=True,
capture_stderr=True,
bare=True,
gitdir=self._gitdir,
)
if p.Wait() == 0:
ref_id = p.stdout.strip()
if ref_id:
self._phyref[name] = ref_id
def _TrackMtime(self, name: str) -> None:
"""Track the modification time of a specific gitdir path."""
path = os.path.join(self._gitdir, name)
try:
fd = open(path)
mtime = os.path.getmtime(path)
self._mtime[name] = os.path.getmtime(path)
except OSError:
return
def _TrackTreeMtimes(self, root: str) -> None:
"""Recursively track modification times for a directory tree."""
root_path = os.path.join(self._gitdir, root)
try:
for line in fd:
line = str(line)
if line[0] == "#":
continue
if line[0] == "^":
continue
line = line[:-1]
p = line.split(" ")
ref_id = p[0]
name = p[1]
self._phyref[name] = ref_id
finally:
fd.close()
self._mtime["packed-refs"] = mtime
def _ReadLoose(self, prefix):
base = os.path.join(self._gitdir, prefix)
for name in platform_utils.listdir(base):
p = os.path.join(base, name)
# We don't implement the full ref validation algorithm, just the
# simple rules that would show up in local filesystems.
# https://git-scm.com/docs/git-check-ref-format
if name.startswith(".") or name.endswith(".lock"):
pass
elif platform_utils.isdir(p):
self._mtime[prefix] = os.path.getmtime(base)
self._ReadLoose(prefix + name + "/")
else:
self._ReadLoose1(p, prefix + name)
def _ReadLoose1(self, path, name):
try:
with open(path) as fd:
mtime = os.path.getmtime(path)
ref_id = fd.readline()
except (OSError, UnicodeError):
if not platform_utils.isdir(root_path):
return
except OSError:
return
try:
ref_id = ref_id.decode()
except AttributeError:
pass
if not ref_id:
return
ref_id = ref_id[:-1]
to_scan = [root]
while to_scan:
name = to_scan.pop()
self._TrackMtime(name)
path = os.path.join(self._gitdir, name)
if not platform_utils.isdir(path):
continue
if ref_id.startswith("ref: "):
self._symref[name] = ref_id[5:]
else:
self._phyref[name] = ref_id
self._mtime[name] = mtime
for child in platform_utils.listdir(path):
child_name = os.path.join(name, child)
child_path = os.path.join(self._gitdir, child_name)
if platform_utils.isdir(child_path):
to_scan.append(child_name)
else:
self._TrackMtime(child_name)
+99
View File
@@ -0,0 +1,99 @@
# Copyright (C) 2026 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unittests for the git_refs.py module."""
import os
from pathlib import Path
import subprocess
import pytest
import utils_for_test
import git_refs
def _run(repo, *args):
return subprocess.run(
["git", "-C", repo, *args],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding="utf-8",
check=True,
).stdout.strip()
def _init_repo(tmp_path, reftable=False):
repo = os.path.join(tmp_path, "repo")
ref_format = "reftable" if reftable else "files"
utils_for_test.init_git_tree(repo, ref_format=ref_format)
Path(os.path.join(repo, "a")).write_text("1")
_run(repo, "add", "a")
_run(repo, "commit", "-q", "-m", "init")
return repo
@pytest.mark.parametrize("reftable", [False, True])
def test_reads_refs(tmp_path, reftable):
if reftable and not utils_for_test.supports_reftable():
pytest.skip("reftable not supported")
repo = _init_repo(tmp_path, reftable=reftable)
gitdir = os.path.join(repo, ".git")
refs = git_refs.GitRefs(gitdir)
branch = _run(repo, "symbolic-ref", "--short", "HEAD")
head = _run(repo, "rev-parse", "HEAD")
assert refs.symref("HEAD") == f"refs/heads/{branch}"
assert refs.get("HEAD") == head
assert refs.get(f"refs/heads/{branch}") == head
@pytest.mark.parametrize("reftable", [False, True])
def test_updates_when_refs_change(tmp_path, reftable):
if reftable and not utils_for_test.supports_reftable():
pytest.skip("reftable not supported")
repo = _init_repo(tmp_path, reftable=reftable)
gitdir = os.path.join(repo, ".git")
refs = git_refs.GitRefs(gitdir)
head = _run(repo, "rev-parse", "HEAD")
assert refs.get("refs/heads/topic") == ""
_run(repo, "branch", "topic")
assert refs.get("refs/heads/topic") == head
_run(repo, "branch", "-D", "topic")
assert refs.get("refs/heads/topic") == ""
@pytest.mark.skipif(
not utils_for_test.supports_refs_migrate(),
reason="git refs migrate reftable support is required for this test",
)
def test_updates_when_storage_backend_toggles(tmp_path):
repo = _init_repo(tmp_path, reftable=False)
gitdir = os.path.join(repo, ".git")
refs = git_refs.GitRefs(gitdir)
head = _run(repo, "rev-parse", "HEAD")
assert refs.get("refs/heads/reftable-branch") == ""
_run(repo, "refs", "migrate", "--ref-format=reftable")
_run(repo, "branch", "reftable-branch")
assert refs.get("refs/heads/reftable-branch") == head
assert refs.get("refs/heads/files-branch") == ""
_run(repo, "refs", "migrate", "--ref-format=files")
_run(repo, "branch", "files-branch")
assert refs.get("refs/heads/files-branch") == head
+49 -3
View File
@@ -18,20 +18,28 @@ If you want to write a per-test fixture, see conftest.py instead.
"""
import contextlib
import functools
from pathlib import Path
import subprocess
import tempfile
from typing import Union
from typing import Optional, Union
import git_command
def init_git_tree(path: Union[str, Path]) -> None:
def init_git_tree(
path: Union[str, Path],
ref_format: Optional[str] = None,
) -> None:
"""Initialize `path` as a new git repo."""
with contextlib.ExitStack() as stack:
# Tests need to assume, that main is default branch at init,
# which is not supported in config until 2.28.
cmd = ["git", "init"]
cmd = ["git"]
if ref_format:
cmd += ["-c", f"init.defaultRefFormat={ref_format}"]
cmd += ["init"]
if git_command.git_require((2, 28, 0)):
cmd += ["--initial-branch=main"]
else:
@@ -51,3 +59,41 @@ def TempGitTree():
with tempfile.TemporaryDirectory(prefix="repo-tests") as tempdir:
init_git_tree(tempdir)
yield tempdir
@functools.lru_cache(maxsize=None)
def supports_reftable() -> bool:
"""Check if git supports reftable."""
with tempfile.TemporaryDirectory(prefix="repo-tests") as tempdir:
proc = subprocess.run(
["git", "-c", "init.defaultRefFormat=reftable", "init"],
cwd=tempdir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
)
return proc.returncode == 0
@functools.lru_cache(maxsize=None)
def supports_refs_migrate() -> bool:
"""Check if git supports refs migrate."""
with tempfile.TemporaryDirectory(prefix="repo-tests") as tempdir:
subprocess.check_call(
["git", "-c", "init.defaultRefFormat=files", "init"],
cwd=tempdir,
)
proc = subprocess.run(
[
"git",
"refs",
"migrate",
"--ref-format=reftable",
"--dry-run",
],
cwd=tempdir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
)
return proc.returncode == 0