subcmds: reduce multiprocessing serialization overhead

Follow the same approach as 39ffd9977e to reduce serialization overhead.

Below benchmarks are tested with 2.7k projects on my workstation
(warm cache). git tracing is disabled for benchmark.

(seconds)              | v2.48 | v2.48 | this CL | this CL
	               |       |  -j32 |         |    -j32
-----------------------------------------------------------
with clean tree state:
branches (none)        |   5.6 |   5.9 |    1.0  |    0.9
status (clean)         |  21.3 |   9.4 |   19.4  |    4.7
diff (none)            |   7.6 |   7.2 |    5.7  |    2.2
prune (none)           |   5.7 |   6.1 |    1.3  |    1.2
abandon (none)         |  19.4 |  18.6 |    0.9  |    0.8
upload (none)          |  19.7 |  18.7 |    0.9  |    0.8
forall -c true         |   7.5 |   7.6 |    0.6  |    0.6
forall -c "git log -1" |  11.3 |  11.1 |    0.6  |    0.6

with branches:
start BRANCH --all     |  21.9 |  20.3 |   13.6  |    2.6
checkout BRANCH        |  29.1 |  27.8 |    1.1  |    1.0
branches (2)           |  28.0 |  28.6 |    1.5  |    1.3
abandon BRANCH         |  29.2 |  27.5 |    9.7  |    2.2

Bug: b/371638995
Change-Id: I53989a3d1e43063587b3f52f852b1c2c56b49412
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/440221
Reviewed-by: Josip Sokcevic <sokcevic@google.com>
Tested-by: Kuang-che Wu <kcwu@google.com>
Commit-Queue: Kuang-che Wu <kcwu@google.com>
This commit is contained in:
Kuang-che Wu
2024-10-22 21:04:41 +08:00
committed by LUCI
parent 39ffd9977e
commit 8da4861b38
11 changed files with 230 additions and 174 deletions
+27 -22
View File
@@ -23,7 +23,6 @@ from error import GitError
from error import InvalidArgumentsError
from error import SilentRepoExitError
from git_command import GitCommand
from project import Project
from repo_logging import RepoLogger
@@ -40,7 +39,7 @@ class GrepColoring(Coloring):
class ExecuteOneResult(NamedTuple):
"""Result from an execute instance."""
project: Project
project_idx: int
rc: int
stdout: str
stderr: str
@@ -262,8 +261,10 @@ contain a line that matches both expressions:
help="Show only file names not containing matching lines",
)
def _ExecuteOne(self, cmd_argv, project):
@classmethod
def _ExecuteOne(cls, cmd_argv, project_idx):
"""Process one project."""
project = cls.get_parallel_context()["projects"][project_idx]
try:
p = GitCommand(
project,
@@ -274,7 +275,7 @@ contain a line that matches both expressions:
verify_command=True,
)
except GitError as e:
return ExecuteOneResult(project, -1, None, str(e), e)
return ExecuteOneResult(project_idx, -1, None, str(e), e)
try:
error = None
@@ -282,10 +283,12 @@ contain a line that matches both expressions:
except GitError as e:
rc = 1
error = e
return ExecuteOneResult(project, rc, p.stdout, p.stderr, error)
return ExecuteOneResult(project_idx, rc, p.stdout, p.stderr, error)
@staticmethod
def _ProcessResults(full_name, have_rev, opt, _pool, out, results):
def _ProcessResults(
full_name, have_rev, opt, projects, _pool, out, results
):
git_failed = False
bad_rev = False
have_match = False
@@ -293,9 +296,10 @@ contain a line that matches both expressions:
errors = []
for result in results:
project = projects[result.project_idx]
if result.rc < 0:
git_failed = True
out.project("--- project %s ---" % _RelPath(result.project))
out.project("--- project %s ---" % _RelPath(project))
out.nl()
out.fail("%s", result.stderr)
out.nl()
@@ -311,9 +315,7 @@ contain a line that matches both expressions:
):
bad_rev = True
else:
out.project(
"--- project %s ---" % _RelPath(result.project)
)
out.project("--- project %s ---" % _RelPath(project))
out.nl()
out.fail("%s", result.stderr.strip())
out.nl()
@@ -331,13 +333,13 @@ contain a line that matches both expressions:
rev, line = line.split(":", 1)
out.write("%s", rev)
out.write(":")
out.project(_RelPath(result.project))
out.project(_RelPath(project))
out.write("/")
out.write("%s", line)
out.nl()
elif full_name:
for line in r:
out.project(_RelPath(result.project))
out.project(_RelPath(project))
out.write("/")
out.write("%s", line)
out.nl()
@@ -381,16 +383,19 @@ contain a line that matches both expressions:
cmd_argv.extend(opt.revision)
cmd_argv.append("--")
git_failed, bad_rev, have_match, errors = self.ExecuteInParallel(
opt.jobs,
functools.partial(self._ExecuteOne, cmd_argv),
projects,
callback=functools.partial(
self._ProcessResults, full_name, have_rev, opt
),
output=out,
ordered=True,
)
with self.ParallelContext():
self.get_parallel_context()["projects"] = projects
git_failed, bad_rev, have_match, errors = self.ExecuteInParallel(
opt.jobs,
functools.partial(self._ExecuteOne, cmd_argv),
range(len(projects)),
callback=functools.partial(
self._ProcessResults, full_name, have_rev, opt, projects
),
output=out,
ordered=True,
chunksize=1,
)
if git_failed:
raise GrepCommandError(