1
0
mirror of https://git.yoctoproject.org/poky synced 2026-06-03 01:40:07 +00:00

bitbake: fetch2/gitsm.py: Rework the git submodule fetcher

The prior fetcher did not know how to work with MIRRORS, and did not
honor BB_NO_NETWORK and similar.

The new fetcher approach recursively calls 'gitsm' download on each
submodule detected.  This ensures that it will go throug the
standard download process.

Each downloaded submodule is then 'attached' to the original download in
the 'modules' directory.  This mimics the behavior of:

    git submodule init

but there is no chance it will contact the network without permission.

It then corrects upstream reference URIs.

The unpack steps simply copies the items from the downloads to the destdir.
Once copied the submodules are connected and we then run:

    git submodule update

According to the git documentation, git submodule init can and will modify
the project configuration and may connect to the network.  Doing the
work manually prevents this.  (This manual process is allowed based
on my reading of the documentation.)

See: https://git-scm.com/book/en/v2/Git-Tools-Submodules

The small change to the existing test is due to this new code always assuming
the code is from a remote system, and not a 'local' repository.  If this
assumption proves to be incorrect -- code will need to be added to deal
with local repositories without an upstream URI.

(Bitbake rev: 9c6b39adf9781fa6745f48913a97c859fa37eb5b)

Signed-off-by: Mark Hatle <mark.hatle@windriver.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Mark Hatle
2018-09-25 13:15:25 -04:00
committed by Richard Purdie
parent f61ef5b454
commit 5cd00e3e53
2 changed files with 153 additions and 117 deletions
+150 -117
View File
@@ -34,6 +34,8 @@ import bb
from bb.fetch2.git import Git from bb.fetch2.git import Git
from bb.fetch2 import runfetchcmd from bb.fetch2 import runfetchcmd
from bb.fetch2 import logger from bb.fetch2 import logger
from bb.fetch2 import Fetch
from bb.fetch2 import BBFetchException
class GitSM(Git): class GitSM(Git):
def supports(self, ud, d): def supports(self, ud, d):
@@ -42,96 +44,66 @@ class GitSM(Git):
""" """
return ud.type in ['gitsm'] return ud.type in ['gitsm']
def uses_submodules(self, ud, d, wd): def update_submodules(self, ud, d):
submodules = []
paths = {}
uris = {}
local_paths = {}
for name in ud.names: for name in ud.names:
try: try:
runfetchcmd("%s show %s:.gitmodules" % (ud.basecmd, ud.revisions[name]), d, quiet=True, workdir=wd) gitmodules = runfetchcmd("%s show %s:.gitmodules" % (ud.basecmd, ud.revisions[name]), d, quiet=True, workdir=ud.clonedir)
return True except:
except bb.fetch.FetchError: # No submodules to update
pass continue
return False
def _set_relative_paths(self, repopath): module = ""
""" for line in gitmodules.splitlines():
Fix submodule paths to be relative instead of absolute,
so that when we move the repo it doesn't break
(In Git 1.7.10+ this is done automatically)
"""
submodules = []
with open(os.path.join(repopath, '.gitmodules'), 'r') as f:
for line in f.readlines():
if line.startswith('[submodule'): if line.startswith('[submodule'):
submodules.append(line.split('"')[1]) module = line.split('"')[1]
submodules.append(module)
elif module and line.strip().startswith('path'):
path = line.split('=')[1].strip()
paths[module] = path
elif module and line.strip().startswith('url'):
url = line.split('=')[1].strip()
uris[module] = url
for module in submodules: for module in submodules:
repo_conf = os.path.join(repopath, module, '.git') module_hash = runfetchcmd("%s ls-tree -z -d %s %s" % (ud.basecmd, ud.revisions[name], paths[module]), d, quiet=True, workdir=ud.clonedir)
if os.path.exists(repo_conf): module_hash = module_hash.split()[2]
with open(repo_conf, 'r') as f:
lines = f.readlines()
newpath = ''
for i, line in enumerate(lines):
if line.startswith('gitdir:'):
oldpath = line.split(': ')[-1].rstrip()
if oldpath.startswith('/'):
newpath = '../' * (module.count('/') + 1) + '.git/modules/' + module
lines[i] = 'gitdir: %s\n' % newpath
break
if newpath:
with open(repo_conf, 'w') as f:
for line in lines:
f.write(line)
repo_conf2 = os.path.join(repopath, '.git', 'modules', module, 'config') # Build new SRC_URI
if os.path.exists(repo_conf2): proto = uris[module].split(':', 1)[0]
with open(repo_conf2, 'r') as f: url = uris[module].replace('%s:' % proto, 'gitsm:', 1)
lines = f.readlines() url += ';protocol=%s' % proto
newpath = '' url += ";name=%s" % module
for i, line in enumerate(lines): url += ";qbareclone=1;nocheckout=1"
if line.lstrip().startswith('worktree = '):
oldpath = line.split(' = ')[-1].rstrip()
if oldpath.startswith('/'):
newpath = '../' * (module.count('/') + 3) + module
lines[i] = '\tworktree = %s\n' % newpath
break
if newpath:
with open(repo_conf2, 'w') as f:
for line in lines:
f.write(line)
def update_submodules(self, ud, d, allow_network): ld = d.createCopy()
# We have to convert bare -> full repo, do the submodule bit, then convert back # Not necessary to set SRC_URI, since we're passing the URI to
tmpclonedir = ud.clonedir + ".tmp" # Fetch.
gitdir = tmpclonedir + os.sep + ".git" #ld.setVar('SRC_URI', url)
bb.utils.remove(tmpclonedir, True) ld.setVar('SRCREV_%s' % module, module_hash)
os.mkdir(tmpclonedir)
os.rename(ud.clonedir, gitdir)
runfetchcmd("sed " + gitdir + "/config -i -e 's/bare.*=.*true/bare = false/'", d)
runfetchcmd(ud.basecmd + " reset --hard", d, workdir=tmpclonedir)
runfetchcmd(ud.basecmd + " checkout -f " + ud.revisions[ud.names[0]], d, workdir=tmpclonedir)
try: # Workaround for issues with SRCPV/SRCREV_FORMAT errors
if allow_network: # error refer to 'multiple' repositories. Only the repository
fetch_flags = "" # in the original SRC_URI actually matters...
else: ld.setVar('SRCPV', d.getVar('SRCPV'))
fetch_flags = "--no-fetch" ld.setVar('SRCREV_FORMAT', module)
# The 'git submodule sync' sandwiched between two successive 'git submodule update' commands is newfetch = Fetch([url], ld, cache=False)
# intentional. See the notes on the similar construction in download() for an explanation. newfetch.download()
runfetchcmd("%(basecmd)s submodule update --init --recursive %(fetch_flags)s || (%(basecmd)s submodule sync --recursive && %(basecmd)s submodule update --init --recursive %(fetch_flags)s)" % {'basecmd': ud.basecmd, 'fetch_flags' : fetch_flags}, d, workdir=tmpclonedir) local_paths[module] = newfetch.localpath(url)
except bb.fetch.FetchError:
if allow_network: # Correct the submodule references to the local download version...
raise runfetchcmd("%(basecmd)s config submodule.%(module)s.url %(url)s" % {'basecmd': ud.basecmd, 'module': module, 'url' : local_paths[module]}, d, workdir=ud.clonedir)
else: try:
# This method was called as a probe to see whether the submodule history os.mkdir(os.path.join(ud.clonedir, 'modules'))
# is complete enough to allow the current working copy to have its except OSError:
# modules filled in. It's not, so swallow up the exception and report pass
# the negative result. if not os.path.exists(os.path.join(ud.clonedir, 'modules', paths[module])):
return False os.symlink(local_paths[module], os.path.join(ud.clonedir, 'modules', paths[module]))
finally:
self._set_relative_paths(tmpclonedir)
runfetchcmd("sed " + gitdir + "/config -i -e 's/bare.*=.*false/bare = true/'", d, workdir=tmpclonedir)
os.rename(gitdir, ud.clonedir,)
bb.utils.remove(tmpclonedir, True)
return True return True
@@ -147,56 +119,117 @@ class GitSM(Git):
# Now check that the submodule histories are new enough. The git-submodule command doesn't have # Now check that the submodule histories are new enough. The git-submodule command doesn't have
# any clean interface for doing this aside from just attempting the checkout (with network # any clean interface for doing this aside from just attempting the checkout (with network
# fetched disabled). # fetched disabled).
return not self.update_submodules(ud, d, allow_network=False) return not self.update_submodules(ud, d)
def download(self, ud, d): def download(self, ud, d):
Git.download(self, ud, d) Git.download(self, ud, d)
if not ud.shallow or ud.localpath != ud.fullshallow: if not ud.shallow or ud.localpath != ud.fullshallow:
submodules = self.uses_submodules(ud, d, ud.clonedir) self.update_submodules(ud, d)
if submodules:
self.update_submodules(ud, d, allow_network=True) def copy_submodules(self, submodules, ud, destdir, d):
if ud.bareclone:
repo_conf = destdir
else:
repo_conf = os.path.join(destdir, '.git')
if submodules and not os.path.exists(os.path.join(repo_conf, 'modules')):
os.mkdir(os.path.join(repo_conf, 'modules'))
for module in submodules:
srcpath = os.path.join(ud.clonedir, 'modules', module)
modpath = os.path.join(repo_conf, 'modules', module)
if os.path.exists(srcpath):
if os.path.exists(os.path.join(srcpath, '.git')):
srcpath = os.path.join(srcpath, '.git')
target = modpath
if os.path.exists(modpath):
target = os.path.dirname(modpath)
runfetchcmd("cp -fpLR %s %s" % (srcpath, target), d)
elif os.path.exists(modpath):
# Module already exists, likely unpacked from a shallow mirror clone
pass
else:
# This is fatal, as we do NOT want git-submodule to hit the network
raise bb.fetch2.FetchError('Submodule %s does not exist in %s or %s.' % (module, srcpath, modpath))
def clone_shallow_local(self, ud, dest, d): def clone_shallow_local(self, ud, dest, d):
super(GitSM, self).clone_shallow_local(ud, dest, d) super(GitSM, self).clone_shallow_local(ud, dest, d)
runfetchcmd('cp -fpPRH "%s/modules" "%s/"' % (ud.clonedir, os.path.join(dest, '.git')), d) # Copy over the submodules' fetched histories too.
repo_conf = os.path.join(dest, '.git')
submodules = []
for name in ud.names:
try:
gitmodules = runfetchcmd("%s show %s:.gitmodules" % (ud.basecmd, ud.revision), d, quiet=True, workdir=dest)
except:
# No submodules to update
continue
for line in gitmodules.splitlines():
if line.startswith('[submodule'):
module = line.split('"')[1]
submodules.append(module)
self.copy_submodules(submodules, ud, dest, d)
def unpack(self, ud, destdir, d): def unpack(self, ud, destdir, d):
Git.unpack(self, ud, destdir, d) Git.unpack(self, ud, destdir, d)
if self.uses_submodules(ud, d, ud.destdir): # Copy over the submodules' fetched histories too.
runfetchcmd(ud.basecmd + " checkout " + ud.revisions[ud.names[0]], d, workdir=ud.destdir) if ud.bareclone:
repo_conf = ud.destdir
else:
repo_conf = os.path.join(ud.destdir, '.git')
# Copy over the submodules' fetched histories too. submodules = []
if ud.bareclone: paths = {}
repo_conf = ud.destdir uris = {}
else: local_paths = {}
repo_conf = os.path.join(ud.destdir, '.git') for name in ud.names:
try:
gitmodules = runfetchcmd("%s show HEAD:.gitmodules" % (ud.basecmd), d, quiet=True, workdir=ud.destdir)
except:
# No submodules to update
continue
if os.path.exists(ud.clonedir): module = ""
# This is not a copy unpacked from a shallow mirror clone. So for line in gitmodules.splitlines():
# the manual intervention to populate the .git/modules done if line.startswith('[submodule'):
# in clone_shallow_local() won't have been done yet. module = line.split('"')[1]
runfetchcmd("cp -fpPRH %s %s" % (os.path.join(ud.clonedir, 'modules'), repo_conf), d) submodules.append(module)
fetch_flags = "--no-fetch" elif module and line.strip().startswith('path'):
elif os.path.exists(os.path.join(repo_conf, 'modules')): path = line.split('=')[1].strip()
# Unpacked from a shallow mirror clone. Manual population of paths[module] = path
# .git/modules is already done. elif module and line.strip().startswith('url'):
fetch_flags = "--no-fetch" url = line.split('=')[1].strip()
else: uris[module] = url
# This isn't fatal; git-submodule will just fetch it
# during do_unpack().
fetch_flags = ""
bb.error("submodule history not retrieved during do_fetch()")
# Careful not to hit the network during unpacking; all history should already self.copy_submodules(submodules, ud, ud.destdir, d)
# be fetched.
# for module in submodules:
# The repeated attempts to do the submodule initialization sandwiched around a sync to srcpath = os.path.join(ud.clonedir, 'modules', module)
# install the correct remote URLs into the submodules' .git/config metadata are deliberate. modpath = os.path.join(repo_conf, 'modules', module)
# Bad remote URLs are leftover in the modules' .git/config files from the unpack of bare
# clone tarballs and an initial 'git submodule update' is necessary to prod them back to # Determine (from the submodule) the correct url to reference
# enough life so that the 'git submodule sync' realizes the existing module .git/config try:
# files exist to be updated. output = runfetchcmd("%(basecmd)s config remote.origin.url" % {'basecmd': ud.basecmd}, d, workdir=modpath)
runfetchcmd("%(basecmd)s submodule update --init --recursive %(fetch_flags)s || (%(basecmd)s submodule sync --recursive && %(basecmd)s submodule update --init --recursive %(fetch_flags)s)" % {'basecmd': ud.basecmd, 'fetch_flags': fetch_flags}, d, workdir=ud.destdir) except bb.fetch2.FetchError as e:
# No remote url defined in this submodule
continue
local_paths[module] = output
# Setup the local URL properly (like git submodule init or sync would do...)
runfetchcmd("%(basecmd)s config submodule.%(module)s.url %(url)s" % {'basecmd': ud.basecmd, 'module': module, 'url' : local_paths[module]}, d, workdir=ud.destdir)
# Ensure the submodule repository is NOT set to bare, since we're checking it out...
runfetchcmd("%s config core.bare false" % (ud.basecmd), d, quiet=True, workdir=modpath)
if submodules:
# Run submodule update, this sets up the directories -- without touching the config
runfetchcmd("%s submodule update --no-fetch" % (ud.basecmd), d, quiet=True, workdir=ud.destdir)
+3
View File
@@ -1344,6 +1344,9 @@ class GitShallowTest(FetcherTest):
smdir = os.path.join(self.tempdir, 'gitsubmodule') smdir = os.path.join(self.tempdir, 'gitsubmodule')
bb.utils.mkdirhier(smdir) bb.utils.mkdirhier(smdir)
self.git('init', cwd=smdir) self.git('init', cwd=smdir)
# Make this look like it was cloned from a remote...
self.git('config --add remote.origin.url "%s"' % smdir, cwd=smdir)
self.git('config --add remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"', cwd=smdir)
self.add_empty_file('asub', cwd=smdir) self.add_empty_file('asub', cwd=smdir)
self.git('submodule init', cwd=self.srcdir) self.git('submodule init', cwd=self.srcdir)