Compare commits

..

5 Commits

Author SHA1 Message Date
Josip Sokcevic
db111d3924 sync: Recover from errors during read-tree
When repo is initializing a git repository, it calls `git read-tree`.
During such operation, git is restoring workspace based on the current
index. However, some things can go wrong: a user can run out of disk
space, or, in case of partial clone, user may no longer reach the remote
host. That will leave affected repository in a bad state with partially
checked out workspace. The follow up repo sync won't try to fix such
state.

This change removes .git symlink, which will force the next `repo sync`
to redo Git repository setup.

Bug: b/363171216
Bug: b/390161127
Change-Id: I57db4b6cae0ef21826dc7cede4d3bf02cfc3d955
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447801
Reviewed-by: Scott Lee <ddoman@google.com>
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
2025-01-16 09:19:45 -08:00
Josip Sokcevic
3405446a4e gc: Add repack option
When a repository is partially cloned, no longer needed blobs are never
removed. To reclaim some of disk space, allow user to pass --repack
which affects only repositories with filter=blob:none and if projects
are not shared.

Change-Id: I0608172c9eff82fb8a6b6ef703eb109fedb7a6cc
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447722
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Scott Lee <ddoman@google.com>
2025-01-14 15:17:34 -08:00
Josip Sokcevic
41a27eb854 gc: extract deletion from Execute method
Change-Id: Icef4f28fbdb9658892611def7589f5eba43c952c
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447721
Reviewed-by: Scott Lee <ddoman@google.com>
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
2025-01-14 12:33:45 -08:00
Josip Sokcevic
d93fe60e89 sync: Handle KeyboardInterrupt during checkout
KeyboardInterrupt is handled during NetworkHalf. This patch handles
KeyboardInterrupt during LocalHalf.

Bug: b/372069163
Change-Id: I26847f7ca3cdf1fe57b265b4f6b18cc8102d2921
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447401
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Gavin Mak <gavinmak@google.com>
2025-01-08 13:36:52 -08:00
Josip Sokcevic
61224d01fa sync: skip network half on repo upgrade
When repo upgrades itself, it will restart itself and rerun sync
command. At that point, we know that network half is already done and we
can just proceed with local half.

R=ddoman@google.com

Bug: b/377567091
Change-Id: I77205b1f2df19891597347d55283a617de3c6634
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/446201
Reviewed-by: Scott Lee <ddoman@google.com>
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
2024-12-18 11:49:17 -08:00
3 changed files with 188 additions and 14 deletions

View File

@@ -3443,11 +3443,18 @@ class Project:
# Finish checking out the worktree.
cmd = ["read-tree", "--reset", "-u", "-v", HEAD]
if GitCommand(self, cmd).Wait() != 0:
raise GitError(
"Cannot initialize work tree for " + self.name,
project=self.name,
)
try:
if GitCommand(self, cmd).Wait() != 0:
raise GitError(
"Cannot initialize work tree for " + self.name,
project=self.name,
)
except Exception as e:
# Something went wrong with read-tree (perhaps fetching
# missing blobs), so remove .git to avoid half initialized
# workspace from which repo can't recover on its own.
platform_utils.remove(dotgit)
raise e
if submodules:
self._SyncSubmodules(quiet=True)

View File

@@ -13,16 +13,18 @@
# limitations under the License.
import os
from typing import Set
from typing import List, Set
from command import Command
from git_command import GitCommand
import platform_utils
from progress import Progress
from project import Project
class Gc(Command):
COMMON = True
helpSummary = "Cleaning up internal repo state."
helpSummary = "Cleaning up internal repo and Git state."
helpUsage = """
%prog
"""
@@ -43,6 +45,13 @@ class Gc(Command):
action="store_true",
help="answer yes to all safe prompts",
)
p.add_option(
"--repack",
default=False,
action="store_true",
help="repack all projects that use partial clone with "
"filter=blob:none",
)
def _find_git_to_delete(
self, to_keep: Set[str], start_dir: str
@@ -64,10 +73,7 @@ class Gc(Command):
return to_delete
def Execute(self, opt, args):
projects = self.GetProjects(
args, all_manifests=not opt.this_manifest_only
)
def delete_unused_projects(self, projects: List[Project], opt):
print(f"Scanning filesystem under {self.repodir}...")
project_paths = set()
@@ -90,11 +96,11 @@ class Gc(Command):
if not to_delete:
print("Nothing to clean up.")
return
return 0
print("Identified the following projects are no longer used:")
print("\n".join(to_delete))
print("\n")
print("")
if not opt.yes:
print(
"If you proceed, any local commits in those projects will be "
@@ -125,3 +131,162 @@ class Gc(Command):
platform_utils.rmtree(tmp_path)
pm.update(msg=path)
pm.end()
return 0
def _generate_promisor_files(self, pack_dir: str):
"""Generates promisor files for all pack files in the given directory.
Promisor files are empty files with the same name as the corresponding
pack file but with the ".promisor" extension. They are used by Git.
"""
for root, _, files in platform_utils.walk(pack_dir):
for file in files:
if not file.endswith(".pack"):
continue
with open(os.path.join(root, f"{file[:-4]}promisor"), "w"):
pass
def repack_projects(self, projects: List[Project], opt):
repack_projects = []
# Find all projects eligible for repacking:
# - can't be shared
# - have a specific fetch filter
for project in projects:
if project.config.GetBoolean("extensions.preciousObjects"):
continue
if not project.clone_depth:
continue
if project.manifest.CloneFilterForDepth != "blob:none":
continue
repack_projects.append(project)
if opt.dryrun:
print(f"Would have repacked {len(repack_projects)} projects.")
return 0
pm = Progress(
"Repacking (this will take a while)",
len(repack_projects),
delay=False,
quiet=opt.quiet,
show_elapsed=True,
elide=True,
)
for project in repack_projects:
pm.update(msg=f"{project.name}")
pack_dir = os.path.join(project.gitdir, "tmp_repo_repack")
if os.path.isdir(pack_dir):
platform_utils.rmtree(pack_dir)
os.mkdir(pack_dir)
# Prepare workspace for repacking - remove all unreachable refs and
# their objects.
GitCommand(
project,
["reflog", "expire", "--expire-unreachable=all"],
verify_command=True,
).Wait()
pm.update(msg=f"{project.name} | gc", inc=0)
GitCommand(
project,
["gc"],
verify_command=True,
).Wait()
# Get all objects that are reachable from the remote, and pack them.
pm.update(msg=f"{project.name} | generating list of objects", inc=0)
remote_objects_cmd = GitCommand(
project,
[
"rev-list",
"--objects",
f"--remotes={project.remote.name}",
"--filter=blob:none",
],
capture_stdout=True,
verify_command=True,
)
# Get all local objects and pack them.
local_head_objects_cmd = GitCommand(
project,
["rev-list", "--objects", "HEAD^{tree}"],
capture_stdout=True,
verify_command=True,
)
local_objects_cmd = GitCommand(
project,
[
"rev-list",
"--objects",
"--all",
"--reflog",
"--indexed-objects",
"--not",
f"--remotes={project.remote.name}",
],
capture_stdout=True,
verify_command=True,
)
remote_objects_cmd.Wait()
pm.update(msg=f"{project.name} | remote repack", inc=0)
GitCommand(
project,
["pack-objects", os.path.join(pack_dir, "pack")],
input=remote_objects_cmd.stdout,
capture_stderr=True,
capture_stdout=True,
verify_command=True,
).Wait()
# create promisor file for each pack file
self._generate_promisor_files(pack_dir)
local_head_objects_cmd.Wait()
local_objects_cmd.Wait()
pm.update(msg=f"{project.name} | local repack", inc=0)
GitCommand(
project,
["pack-objects", os.path.join(pack_dir, "pack")],
input=local_head_objects_cmd.stdout + local_objects_cmd.stdout,
capture_stderr=True,
capture_stdout=True,
verify_command=True,
).Wait()
# Swap the old pack directory with the new one.
platform_utils.rename(
os.path.join(project.objdir, "objects", "pack"),
os.path.join(project.objdir, "objects", "pack_old"),
)
platform_utils.rename(
pack_dir,
os.path.join(project.objdir, "objects", "pack"),
)
platform_utils.rmtree(
os.path.join(project.objdir, "objects", "pack_old")
)
pm.end()
return 0
def Execute(self, opt, args):
projects: List[Project] = self.GetProjects(
args, all_manifests=not opt.this_manifest_only
)
ret = self.delete_unused_projects(projects, opt)
if ret != 0:
return ret
if not opt.repack:
return
return self.repack_projects(projects, opt)

View File

@@ -1058,6 +1058,8 @@ later is required to fix a server side protocol bug.
verbose=verbose,
)
success = syncbuf.Finish()
except KeyboardInterrupt:
logger.error("Keyboard interrupt while processing %s", project.name)
except GitError as e:
logger.error(
"error.GitError: Cannot checkout %s: %s", project.name, e
@@ -1831,7 +1833,7 @@ later is required to fix a server side protocol bug.
self._fetch_times = _FetchTimes(manifest)
self._local_sync_state = LocalSyncState(manifest)
if not opt.local_only:
if not opt.local_only and not opt.repo_upgraded:
with multiprocessing.Manager() as manager:
with ssh.ProxyManager(manager) as ssh_proxy:
# Initialize the socket dir once in the parent.