mirror of
https://gerrit.googlesource.com/git-repo
synced 2026-01-11 17:10:33 +00:00
sync: Add heuristic warning for bloated shallow repositories
For clone-depth="1" repositories that are dirty or have local commits, add a check at the end of sync to detect excessive git object accumulation. This prevents silent performance degradation and disk exhaustion in large prebuilts repos where automatic GC is typically disabled from https://gerrit.googlesource.com/git-repo/+/7f87c54043ce9a35a5bb60a09ee846f9d7070352 Bug: 379111283 Change-Id: I376f38e1555cc6e906d852f6e63dc1c8f6331b4f Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/534701 Commit-Queue: Gavin Mak <gavinmak@google.com> Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Gavin Mak <gavinmak@google.com>
This commit is contained in:
104
subcmds/sync.py
104
subcmds/sync.py
@@ -87,6 +87,10 @@ _ONE_DAY_S = 24 * 60 * 60
|
||||
|
||||
_REPO_ALLOW_SHALLOW = os.environ.get("REPO_ALLOW_SHALLOW")
|
||||
|
||||
_BLOAT_PACK_COUNT_THRESHOLD = 10
|
||||
_BLOAT_SIZE_PACK_THRESHOLD_KB = 10 * 1024 * 1024 # 10 GiB in KiB
|
||||
_BLOAT_SIZE_GARBAGE_THRESHOLD_KB = 1 * 1024 * 1024 # 1 GiB in KiB
|
||||
|
||||
logger = RepoLogger(__file__)
|
||||
|
||||
|
||||
@@ -1371,6 +1375,104 @@ later is required to fix a server side protocol bug.
|
||||
t.join()
|
||||
pm.end()
|
||||
|
||||
@classmethod
|
||||
def _CheckOneBloatedProject(cls, project_index: int) -> Optional[str]:
|
||||
"""Checks if a single project is bloated.
|
||||
|
||||
Args:
|
||||
project_index: The index of the project in the parallel context.
|
||||
|
||||
Returns:
|
||||
The name of the project if it is bloated, else None.
|
||||
"""
|
||||
project = cls.get_parallel_context()["projects"][project_index]
|
||||
|
||||
if not project.Exists or not project.worktree:
|
||||
return None
|
||||
|
||||
# Only check dirty or locally modified projects. These can't be
|
||||
# freshly cloned and will accumulate garbage.
|
||||
try:
|
||||
is_dirty = project.IsDirty(consider_untracked=True)
|
||||
|
||||
manifest_rev = project.GetRevisionId(project.bare_ref.all)
|
||||
head_rev = project.work_git.rev_parse(HEAD)
|
||||
has_local_commits = manifest_rev != head_rev
|
||||
|
||||
if not (is_dirty or has_local_commits):
|
||||
return None
|
||||
|
||||
output = project.bare_git.count_objects("-v")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
stats = {}
|
||||
for line in output.splitlines():
|
||||
try:
|
||||
key, value = line.split(": ", 1)
|
||||
stats[key.strip()] = int(value.strip())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
pack_count = stats.get("packs", 0)
|
||||
size_pack_kb = stats.get("size-pack", 0)
|
||||
size_garbage_kb = stats.get("size-garbage", 0)
|
||||
|
||||
is_fragmented = (
|
||||
pack_count > _BLOAT_PACK_COUNT_THRESHOLD
|
||||
and size_pack_kb > _BLOAT_SIZE_PACK_THRESHOLD_KB
|
||||
)
|
||||
has_excessive_garbage = (
|
||||
size_garbage_kb > _BLOAT_SIZE_GARBAGE_THRESHOLD_KB
|
||||
)
|
||||
|
||||
if is_fragmented or has_excessive_garbage:
|
||||
return project.name
|
||||
return None
|
||||
|
||||
def _CheckForBloatedProjects(self, projects, opt):
|
||||
"""Check for shallow projects that are accumulating unoptimized data.
|
||||
|
||||
For projects with clone-depth="1" that are dirty (have local changes),
|
||||
run 'git count-objects -v' and warn if the repository is accumulating
|
||||
excessive pack files or garbage.
|
||||
"""
|
||||
projects = [p for p in projects if p.clone_depth]
|
||||
if not projects:
|
||||
return
|
||||
|
||||
bloated_projects = []
|
||||
pm = Progress(
|
||||
"Checking for bloat", len(projects), delay=False, quiet=opt.quiet
|
||||
)
|
||||
|
||||
def _ProcessResults(pool, pm, results):
|
||||
for result in results:
|
||||
if result:
|
||||
bloated_projects.append(result)
|
||||
pm.update(msg="")
|
||||
|
||||
with self.ParallelContext():
|
||||
self.get_parallel_context()["projects"] = projects
|
||||
self.ExecuteInParallel(
|
||||
opt.jobs,
|
||||
self._CheckOneBloatedProject,
|
||||
range(len(projects)),
|
||||
callback=_ProcessResults,
|
||||
output=pm,
|
||||
chunksize=1,
|
||||
)
|
||||
pm.end()
|
||||
|
||||
for project_name in bloated_projects:
|
||||
warn_msg = (
|
||||
f'warning: Project "{project_name}" is accumulating '
|
||||
'unoptimized data. Please run "repo sync --auto-gc" or '
|
||||
'"repo gc --repack" to clean up.'
|
||||
)
|
||||
self.git_event_log.ErrorEvent(warn_msg)
|
||||
logger.warning(warn_msg)
|
||||
|
||||
def _UpdateRepoProject(self, opt, manifest, errors):
|
||||
"""Fetch the repo project and check for updates."""
|
||||
if opt.local_only:
|
||||
@@ -2002,6 +2104,8 @@ later is required to fix a server side protocol bug.
|
||||
"experience, sync the entire tree."
|
||||
)
|
||||
|
||||
self._CheckForBloatedProjects(all_projects, opt)
|
||||
|
||||
if not opt.quiet:
|
||||
print("repo sync has finished successfully.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user