Cache bucket content by prefix

When a publishing uses a publish prefix, instead of listing the contents
of the whole bucket under the storage prefix, only list the contents of
the bucket under the storage prefix and publish prefix, and cache it by
publish prefix.
This speeds up publish operations under a prefix.
This commit is contained in:
Ludovico Cavedon
2023-04-11 10:26:29 -07:00
committed by André Roth
parent 01893a492f
commit fad660450c
9 changed files with 136 additions and 55 deletions

View File

@@ -22,7 +22,7 @@ import (
type PublishedStorage struct {
container azblob.ContainerURL
prefix string
pathCache map[string]string
pathCache map[string]map[string]string
}
// Check interface
@@ -174,32 +174,38 @@ func (storage *PublishedStorage) Remove(path string) error {
// LinkFromPool links package file from pool to dist's pool location
//
// publishedDirectory is desired location in pool (like prefix/pool/component/liba/libav/)
// publishedPrefix is desired prefix for the location in the pool.
// publishedRelParh is desired location in pool (like pool/component/liba/libav/)
// sourcePool is instance of aptly.PackagePool
// sourcePath is filepath to package file in package pool
//
// LinkFromPool returns relative path for the published file to be included in package index
func (storage *PublishedStorage) LinkFromPool(prefix string, path string, fileName string, sourcePool aptly.PackagePool,
func (storage *PublishedStorage) LinkFromPool(publishedPrefix, publishedRelPath, fileName string, sourcePool aptly.PackagePool,
sourcePath string, sourceChecksums utils.ChecksumInfo, force bool) error {
publishedDirectory := filepath.Join(prefix, path)
relPath := filepath.Join(publishedDirectory, fileName)
poolPath := filepath.Join(storage.prefix, relPath)
relFilePath := filepath.Join(publishedRelPath, fileName)
prefixRelFilePath := filepath.Join(publishedPrefix, relFilePath)
poolPath := filepath.Join(storage.prefix, prefixRelFilePath)
if storage.pathCache == nil {
paths, md5s, err := storage.internalFilelist("")
storage.pathCache = make(map[string]map[string]string)
}
pathCache := storage.pathCache[publishedPrefix]
if pathCache == nil {
paths, md5s, err := storage.internalFilelist(publishedPrefix)
if err != nil {
return fmt.Errorf("error caching paths under prefix: %s", err)
}
storage.pathCache = make(map[string]string, len(paths))
pathCache = make(map[string]string, len(paths))
for i := range paths {
storage.pathCache[paths[i]] = md5s[i]
pathCache[paths[i]] = md5s[i]
}
storage.pathCache[publishedPrefix] = pathCache
}
destinationMD5, exists := storage.pathCache[relPath]
destinationMD5, exists := pathCache[relFilePath]
sourceMD5 := sourceChecksums.MD5
if exists {
@@ -222,9 +228,9 @@ func (storage *PublishedStorage) LinkFromPool(prefix string, path string, fileNa
}
defer source.Close()
err = storage.putFile(relPath, source, sourceMD5)
err = storage.putFile(prefixRelFilePath, source, sourceMD5)
if err == nil {
storage.pathCache[relPath] = sourceMD5
pathCache[relFilePath] = sourceMD5
} else {
err = errors.Wrap(err, fmt.Sprintf("error uploading %s to %s: %s", sourcePath, storage, poolPath))
}

View File

@@ -300,45 +300,45 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Assert(err, IsNil)
// first link from pool
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
err = s.storage.LinkFromPool("", filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
c.Check(err, IsNil)
c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))
// duplicate link from pool
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
c.Check(err, IsNil)
c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))
// link from pool with conflict
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, false)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, false)
c.Check(err, ErrorMatches, ".*file already exists and is different.*")
c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))
// link from pool with conflict and force
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, true)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, true)
c.Check(err, IsNil)
c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Spam"))
// for prefixed storage:
// first link from pool
err = s.prefixedStorage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
err = s.prefixedStorage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
c.Check(err, IsNil)
// 2nd link from pool, providing wrong path for source file
//
// this test should check that file already exists in S3 and skip upload (which would fail if not skipped)
s.prefixedStorage.pathCache = nil
err = s.prefixedStorage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, "wrong-looks-like-pathcache-doesnt-work", cksum1, false)
err = s.prefixedStorage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, "wrong-looks-like-pathcache-doesnt-work", cksum1, false)
c.Check(err, IsNil)
c.Check(s.GetFile(c, "lala/pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))
// link from pool with nested file name
err = s.storage.LinkFromPool("dists/jessie/non-free/installer-i386/current/images", "netboot/boot.img.gz", pool, src3, cksum3, false)
err = s.storage.LinkFromPool("", "dists/jessie/non-free/installer-i386/current/images", "netboot/boot.img.gz", pool, src3, cksum3, false)
c.Check(err, IsNil)
c.Check(s.GetFile(c, "dists/jessie/non-free/installer-i386/current/images/netboot/boot.img.gz"), DeepEquals, []byte("Contents"))