mirror of
https://gerrit.googlesource.com/git-repo
synced 2026-05-31 15:09:48 +00:00
project: implement stateless sync pruning logic
Implement in-situ shallow re-fetching and garbage collection logic. Enables repositories with sync-strategy="stateless" to reclaim disk space by running reflog expire and git gc --prune=now if the working tree is clean and has no local commits. Bug: 498730431 Change-Id: I940bdc9b74da29d3f7b13566667dcddea769ebd3 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/568463 Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Gavin Mak <gavinmak@google.com> Commit-Queue: Gavin Mak <gavinmak@google.com>
This commit is contained in:
+101
@@ -629,6 +629,7 @@ class Project:
|
|||||||
self.linkfiles = {}
|
self.linkfiles = {}
|
||||||
self.annotations = []
|
self.annotations = []
|
||||||
self.dest_branch = dest_branch
|
self.dest_branch = dest_branch
|
||||||
|
self.stateless_prune_needed = False
|
||||||
|
|
||||||
# This will be filled in if a project is later identified to be the
|
# This will be filled in if a project is later identified to be the
|
||||||
# project containing repo hooks.
|
# project containing repo hooks.
|
||||||
@@ -758,6 +759,18 @@ class Project:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def HasStash(self) -> bool:
|
||||||
|
"""Returns True if there is a stash in the repository."""
|
||||||
|
p = GitCommand(
|
||||||
|
self,
|
||||||
|
["rev-parse", "--verify", "refs/stash"],
|
||||||
|
bare=True,
|
||||||
|
capture_stdout=True,
|
||||||
|
capture_stderr=True,
|
||||||
|
log_as_error=False,
|
||||||
|
)
|
||||||
|
return p.Wait() == 0
|
||||||
|
|
||||||
_userident_name = None
|
_userident_name = None
|
||||||
_userident_email = None
|
_userident_email = None
|
||||||
|
|
||||||
@@ -1241,6 +1254,67 @@ class Project:
|
|||||||
logger.error("error: Cannot extract archive %s: %s", tarpath, e)
|
logger.error("error: Cannot extract archive %s: %s", tarpath, e)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _ShouldStatelessPrune(
|
||||||
|
self, use_superproject: Optional[bool] = None
|
||||||
|
) -> bool:
|
||||||
|
"""Determines if a stateless prune should be performed.
|
||||||
|
|
||||||
|
Stateless pruning reclaims space by running a reflog expiration and
|
||||||
|
garbage collection instead of an incremental fetch. It is only performed
|
||||||
|
if the repository is clean and has no local-only state.
|
||||||
|
"""
|
||||||
|
if not self.Exists:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self._CheckForImmutableRevision(use_superproject=use_superproject):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Query the target hash from remote to see if we are up-to-date.
|
||||||
|
target_hash = None
|
||||||
|
if IsId(self.revisionExpr):
|
||||||
|
target_hash = self.revisionExpr
|
||||||
|
else:
|
||||||
|
output = self._LsRemote(self.upstream or self.revisionExpr)
|
||||||
|
if output:
|
||||||
|
target_hash = output.splitlines()[0].split()[0]
|
||||||
|
|
||||||
|
if not target_hash:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
local_head = self.bare_git.rev_parse("HEAD")
|
||||||
|
except GitError:
|
||||||
|
local_head = None
|
||||||
|
|
||||||
|
if target_hash == local_head:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Skip if sharing objects with other projects.
|
||||||
|
shares_objdir = self.UseAlternates or self.use_git_worktrees
|
||||||
|
if not shares_objdir:
|
||||||
|
for p in self.manifest.GetProjectsWithName(self.name):
|
||||||
|
if p != self and p.objdir == self.objdir:
|
||||||
|
shares_objdir = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if shares_objdir:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Skip if HEAD contains any unpushed local commits.
|
||||||
|
try:
|
||||||
|
local_commits = self.bare_git.rev_list(
|
||||||
|
"--count", "HEAD", "--not", "--remotes", "--tags"
|
||||||
|
)
|
||||||
|
if int(local_commits[0]) > 0:
|
||||||
|
return False
|
||||||
|
except (GitError, IndexError, ValueError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.IsDirty(consider_untracked=True) or self.HasStash():
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def Sync_NetworkHalf(
|
def Sync_NetworkHalf(
|
||||||
self,
|
self,
|
||||||
quiet=False,
|
quiet=False,
|
||||||
@@ -1318,6 +1392,11 @@ class Project:
|
|||||||
clone_bundle = True
|
clone_bundle = True
|
||||||
clone_filter = None
|
clone_filter = None
|
||||||
|
|
||||||
|
if self.sync_strategy == "stateless" and self._ShouldStatelessPrune(
|
||||||
|
use_superproject
|
||||||
|
):
|
||||||
|
self.stateless_prune_needed = True
|
||||||
|
|
||||||
if is_new is None:
|
if is_new is None:
|
||||||
is_new = not self.Exists
|
is_new = not self.Exists
|
||||||
if is_new:
|
if is_new:
|
||||||
@@ -1602,6 +1681,23 @@ class Project:
|
|||||||
def _dosubmodules():
|
def _dosubmodules():
|
||||||
self._SyncSubmodules(quiet=True)
|
self._SyncSubmodules(quiet=True)
|
||||||
|
|
||||||
|
def _doprune() -> None:
|
||||||
|
"""Expire reflogs and run prune-now GC for stateless sync."""
|
||||||
|
GitCommand(
|
||||||
|
self,
|
||||||
|
["reflog", "expire", "--expire=all", "--all"],
|
||||||
|
bare=True,
|
||||||
|
).Wait()
|
||||||
|
p = GitCommand(
|
||||||
|
self,
|
||||||
|
["gc", "--prune=now"],
|
||||||
|
bare=True,
|
||||||
|
capture_stdout=True,
|
||||||
|
capture_stderr=True,
|
||||||
|
)
|
||||||
|
if p.Wait() != 0:
|
||||||
|
logger.warning("warn: %s: stateless gc failed", self.name)
|
||||||
|
|
||||||
head = self.work_git.GetHead()
|
head = self.work_git.GetHead()
|
||||||
if head.startswith(R_HEADS):
|
if head.startswith(R_HEADS):
|
||||||
branch = head[len(R_HEADS) :]
|
branch = head[len(R_HEADS) :]
|
||||||
@@ -1647,6 +1743,8 @@ class Project:
|
|||||||
fail(e)
|
fail(e)
|
||||||
return
|
return
|
||||||
self._CopyAndLinkFiles()
|
self._CopyAndLinkFiles()
|
||||||
|
if self.stateless_prune_needed:
|
||||||
|
syncbuf.later2(self, _doprune, not verbose)
|
||||||
return
|
return
|
||||||
|
|
||||||
if head == revid:
|
if head == revid:
|
||||||
@@ -1793,6 +1891,9 @@ class Project:
|
|||||||
if submodules:
|
if submodules:
|
||||||
syncbuf.later1(self, _dosubmodules, not verbose)
|
syncbuf.later1(self, _dosubmodules, not verbose)
|
||||||
|
|
||||||
|
if self.stateless_prune_needed:
|
||||||
|
syncbuf.later2(self, _doprune, not verbose)
|
||||||
|
|
||||||
def AddCopyFile(self, src, dest, topdir):
|
def AddCopyFile(self, src, dest, topdir):
|
||||||
"""Mark |src| for copying to |dest| (relative to |topdir|).
|
"""Mark |src| for copying to |dest| (relative to |topdir|).
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import subprocess
|
|||||||
import tempfile
|
import tempfile
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import unittest
|
import unittest
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
import utils_for_test
|
import utils_for_test
|
||||||
|
|
||||||
@@ -565,3 +566,120 @@ class ManifestPropertiesFetchedCorrectly(unittest.TestCase):
|
|||||||
|
|
||||||
fakeproj.config.SetString("manifest.platform", "auto")
|
fakeproj.config.SetString("manifest.platform", "auto")
|
||||||
self.assertEqual(fakeproj.manifest_platform, "auto")
|
self.assertEqual(fakeproj.manifest_platform, "auto")
|
||||||
|
|
||||||
|
|
||||||
|
class StatelessSyncTests(unittest.TestCase):
|
||||||
|
"""Tests for stateless sync strategy."""
|
||||||
|
|
||||||
|
def _get_project(self, tempdir):
|
||||||
|
manifest = mock.MagicMock()
|
||||||
|
manifest.manifestProject.depth = None
|
||||||
|
manifest.manifestProject.dissociate = False
|
||||||
|
manifest.manifestProject.clone_filter = None
|
||||||
|
manifest.is_multimanifest = False
|
||||||
|
manifest.manifestProject.config.GetBoolean.return_value = False
|
||||||
|
|
||||||
|
remote = mock.MagicMock()
|
||||||
|
remote.name = "origin"
|
||||||
|
remote.url = "http://"
|
||||||
|
|
||||||
|
proj = project.Project(
|
||||||
|
manifest=manifest,
|
||||||
|
name="test-project",
|
||||||
|
remote=remote,
|
||||||
|
gitdir=os.path.join(tempdir, ".git"),
|
||||||
|
objdir=os.path.join(tempdir, ".git"),
|
||||||
|
worktree=tempdir,
|
||||||
|
relpath="test-project",
|
||||||
|
revisionExpr="1234abcd",
|
||||||
|
revisionId=None,
|
||||||
|
sync_strategy="stateless",
|
||||||
|
)
|
||||||
|
proj._CheckForImmutableRevision = mock.MagicMock(return_value=False)
|
||||||
|
proj._LsRemote = mock.MagicMock(
|
||||||
|
return_value="1234abcd\trefs/heads/main\n"
|
||||||
|
)
|
||||||
|
proj.bare_git = mock.MagicMock()
|
||||||
|
proj.bare_git.rev_parse.return_value = "5678abcd"
|
||||||
|
proj.bare_git.rev_list.return_value = ["0"]
|
||||||
|
proj.IsDirty = mock.MagicMock(return_value=False)
|
||||||
|
proj.GetBranches = mock.MagicMock(return_value=[])
|
||||||
|
proj.DeleteWorktree = mock.MagicMock()
|
||||||
|
proj._InitGitDir = mock.MagicMock()
|
||||||
|
proj._RemoteFetch = mock.MagicMock(return_value=True)
|
||||||
|
proj._InitRemote = mock.MagicMock()
|
||||||
|
proj._InitMRef = mock.MagicMock()
|
||||||
|
return proj
|
||||||
|
|
||||||
|
def test_sync_network_half_stateless_prune_needed(self):
|
||||||
|
"""Test stateless sync queues prune when needed."""
|
||||||
|
with utils_for_test.TempGitTree() as tempdir:
|
||||||
|
proj = self._get_project(tempdir)
|
||||||
|
res = proj.Sync_NetworkHalf()
|
||||||
|
|
||||||
|
self.assertTrue(res.success)
|
||||||
|
proj.DeleteWorktree.assert_not_called()
|
||||||
|
self.assertTrue(proj.stateless_prune_needed)
|
||||||
|
proj._RemoteFetch.assert_called_once()
|
||||||
|
|
||||||
|
def test_sync_local_half_stateless_prune(self):
|
||||||
|
"""Test stateless GC pruning is queued in Sync_LocalHalf."""
|
||||||
|
with utils_for_test.TempGitTree() as tempdir:
|
||||||
|
proj = self._get_project(tempdir)
|
||||||
|
proj.stateless_prune_needed = True
|
||||||
|
|
||||||
|
proj._Checkout = mock.MagicMock()
|
||||||
|
proj._InitWorkTree = mock.MagicMock()
|
||||||
|
proj.IsRebaseInProgress = mock.MagicMock(return_value=False)
|
||||||
|
proj.IsCherryPickInProgress = mock.MagicMock(return_value=False)
|
||||||
|
proj.bare_ref = mock.MagicMock()
|
||||||
|
proj.bare_ref.all = {}
|
||||||
|
proj.GetRevisionId = mock.MagicMock(return_value="1234abcd")
|
||||||
|
proj._CopyAndLinkFiles = mock.MagicMock()
|
||||||
|
|
||||||
|
proj.work_git = mock.MagicMock()
|
||||||
|
proj.work_git.GetHead.return_value = "5678abcd"
|
||||||
|
|
||||||
|
syncbuf = project.SyncBuffer(proj.config)
|
||||||
|
|
||||||
|
with mock.patch("project.GitCommand") as mock_git_cmd:
|
||||||
|
mock_cmd_instance = mock.MagicMock()
|
||||||
|
mock_cmd_instance.Wait.return_value = 0
|
||||||
|
mock_git_cmd.return_value = mock_cmd_instance
|
||||||
|
|
||||||
|
proj.Sync_LocalHalf(syncbuf)
|
||||||
|
syncbuf.Finish()
|
||||||
|
|
||||||
|
self.assertEqual(mock_git_cmd.call_count, 2)
|
||||||
|
mock_git_cmd.assert_any_call(
|
||||||
|
proj, ["reflog", "expire", "--expire=all", "--all"], bare=True
|
||||||
|
)
|
||||||
|
mock_git_cmd.assert_any_call(
|
||||||
|
proj,
|
||||||
|
["gc", "--prune=now"],
|
||||||
|
bare=True,
|
||||||
|
capture_stdout=True,
|
||||||
|
capture_stderr=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_sync_network_half_stateless_skips_if_stash(self):
|
||||||
|
"""Test stateless sync skips if stash exists."""
|
||||||
|
with utils_for_test.TempGitTree() as tempdir:
|
||||||
|
proj = self._get_project(tempdir)
|
||||||
|
proj.HasStash = mock.MagicMock(return_value=True)
|
||||||
|
|
||||||
|
res = proj.Sync_NetworkHalf()
|
||||||
|
|
||||||
|
self.assertTrue(res.success)
|
||||||
|
self.assertFalse(getattr(proj, "stateless_prune_needed", False))
|
||||||
|
|
||||||
|
def test_sync_network_half_stateless_skips_if_local_commits(self):
|
||||||
|
"""Test stateless sync skips if there are local-only commits."""
|
||||||
|
with utils_for_test.TempGitTree() as tempdir:
|
||||||
|
proj = self._get_project(tempdir)
|
||||||
|
proj.bare_git.rev_list.return_value = ["1"]
|
||||||
|
|
||||||
|
res = proj.Sync_NetworkHalf()
|
||||||
|
|
||||||
|
self.assertTrue(res.success)
|
||||||
|
self.assertFalse(getattr(proj, "stateless_prune_needed", False))
|
||||||
|
|||||||
Reference in New Issue
Block a user