mirror of
https://github.com/aptly-dev/aptly.git
synced 2026-04-20 19:38:39 +00:00
Improve publish cleanup perf when sources share most of their packages
The cleanup phase needs to list out all the files in each component in order to determine what's still in use. When there's a large number of sources (e.g. from having many snapshots), the time spent just loading the package information becomes substantial. However, in many cases, most of the packages being loaded are actually shared across the sources; if you're taking frequent snapshots, for instance, most of the packages in each snapshot will be the same as other snapshots. In these cases, re-reading the packages repeatedly is just a waste of time. To improve this, we maintain a list of refs that we know were processed for each component. When listing the refs from a source, only the ones that have not yet been processed will be examined. Some tests were also added specifically to check listing the files in a component. With this change, listing the files in components on a copy of our production database went from >10 minutes to ~10 seconds, and the newly added benchmark went from ~300ms to ~43ms. Signed-off-by: Ryan Gonzalez <ryan.gonzalez@collabora.com>
This commit is contained in:
committed by
André Roth
parent
5636a9990b
commit
8cb1236a8c
@@ -1138,18 +1138,10 @@ func (collection *PublishedRepoCollection) Len() int {
|
||||
return len(collection.list)
|
||||
}
|
||||
|
||||
// CleanupPrefixComponentFiles removes all unreferenced files in published storage under prefix/component pair
|
||||
func (collection *PublishedRepoCollection) CleanupPrefixComponentFiles(prefix string, components []string,
|
||||
publishedStorage aptly.PublishedStorage, collectionFactory *CollectionFactory, progress aptly.Progress) error {
|
||||
|
||||
collection.loadList()
|
||||
|
||||
var err error
|
||||
func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix string, components []string,
|
||||
collectionFactory *CollectionFactory, progress aptly.Progress) (map[string][]string, error) {
|
||||
referencedFiles := map[string][]string{}
|
||||
|
||||
if progress != nil {
|
||||
progress.Printf("Cleaning up prefix %#v components %s...\n", prefix, strings.Join(components, ", "))
|
||||
}
|
||||
processedComponentRefs := map[string]*PackageRefList{}
|
||||
|
||||
for _, r := range collection.list {
|
||||
if r.Prefix == prefix {
|
||||
@@ -1168,16 +1160,28 @@ func (collection *PublishedRepoCollection) CleanupPrefixComponentFiles(prefix st
|
||||
continue
|
||||
}
|
||||
|
||||
err = collection.LoadComplete(r, collectionFactory)
|
||||
if err != nil {
|
||||
return err
|
||||
if err := collection.LoadComplete(r, collectionFactory); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, component := range components {
|
||||
if utils.StrSliceHasItem(repoComponents, component) {
|
||||
packageList, err := NewPackageListFromRefList(r.RefList(component), collectionFactory.PackageCollection(), progress)
|
||||
unseenRefs := r.RefList(component)
|
||||
processedRefs := processedComponentRefs[component]
|
||||
if processedRefs != nil {
|
||||
unseenRefs = unseenRefs.Subtract(processedRefs)
|
||||
} else {
|
||||
processedRefs = NewPackageRefList()
|
||||
}
|
||||
|
||||
if unseenRefs.Len() == 0 {
|
||||
continue
|
||||
}
|
||||
processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true)
|
||||
|
||||
packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
packageList.ForEach(func(p *Package) error {
|
||||
@@ -1197,6 +1201,24 @@ func (collection *PublishedRepoCollection) CleanupPrefixComponentFiles(prefix st
|
||||
}
|
||||
}
|
||||
|
||||
return referencedFiles, nil
|
||||
}
|
||||
|
||||
// CleanupPrefixComponentFiles removes all unreferenced files in published storage under prefix/component pair
|
||||
func (collection *PublishedRepoCollection) CleanupPrefixComponentFiles(prefix string, components []string,
|
||||
publishedStorage aptly.PublishedStorage, collectionFactory *CollectionFactory, progress aptly.Progress) error {
|
||||
|
||||
collection.loadList()
|
||||
|
||||
if progress != nil {
|
||||
progress.Printf("Cleaning up prefix %#v components %s...\n", prefix, strings.Join(components, ", "))
|
||||
}
|
||||
|
||||
referencedFiles, err := collection.listReferencedFilesByComponent(prefix, components, collectionFactory, progress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, component := range components {
|
||||
sort.Strings(referencedFiles[component])
|
||||
|
||||
|
||||
113
deb/publish_bench_test.go
Normal file
113
deb/publish_bench_test.go
Normal file
@@ -0,0 +1,113 @@
|
||||
package deb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/aptly-dev/aptly/database/goleveldb"
|
||||
)
|
||||
|
||||
func BenchmarkListReferencedFiles(b *testing.B) {
|
||||
const defaultComponent = "main"
|
||||
const repoCount = 16
|
||||
const repoPackagesCount = 1024
|
||||
const uniqPackagesCount = 64
|
||||
|
||||
tmpDir, err := os.MkdirTemp("", "aptly-bench")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
db, err := goleveldb.NewOpenDB(tmpDir)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
factory := NewCollectionFactory(db)
|
||||
packageCollection := factory.PackageCollection()
|
||||
repoCollection := factory.LocalRepoCollection()
|
||||
publishCollection := factory.PublishedRepoCollection()
|
||||
|
||||
sharedRefs := NewPackageRefList()
|
||||
{
|
||||
transaction, err := db.OpenTransaction()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
for pkgIndex := 0; pkgIndex < repoPackagesCount-uniqPackagesCount; pkgIndex++ {
|
||||
p := &Package{
|
||||
Name: fmt.Sprintf("pkg-shared_%d", pkgIndex),
|
||||
Version: "1",
|
||||
Architecture: "amd64",
|
||||
}
|
||||
p.UpdateFiles(PackageFiles{PackageFile{
|
||||
Filename: fmt.Sprintf("pkg-shared_%d.deb", pkgIndex),
|
||||
}})
|
||||
|
||||
packageCollection.UpdateInTransaction(p, transaction)
|
||||
sharedRefs.Refs = append(sharedRefs.Refs, p.Key(""))
|
||||
}
|
||||
|
||||
sort.Sort(sharedRefs)
|
||||
|
||||
if err := transaction.Commit(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
for repoIndex := 0; repoIndex < repoCount; repoIndex++ {
|
||||
refs := NewPackageRefList()
|
||||
|
||||
transaction, err := db.OpenTransaction()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
for pkgIndex := 0; pkgIndex < uniqPackagesCount; pkgIndex++ {
|
||||
p := &Package{
|
||||
Name: fmt.Sprintf("pkg%d_%d", repoIndex, pkgIndex),
|
||||
Version: "1",
|
||||
Architecture: "amd64",
|
||||
}
|
||||
p.UpdateFiles(PackageFiles{PackageFile{
|
||||
Filename: fmt.Sprintf("pkg%d_%d.deb", repoIndex, pkgIndex),
|
||||
}})
|
||||
|
||||
packageCollection.UpdateInTransaction(p, transaction)
|
||||
refs.Refs = append(refs.Refs, p.Key(""))
|
||||
}
|
||||
|
||||
if err := transaction.Commit(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
sort.Sort(refs)
|
||||
|
||||
repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment")
|
||||
repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex)
|
||||
repo.DefaultComponent = defaultComponent
|
||||
repo.UpdateRefList(refs.Merge(sharedRefs, false, true))
|
||||
repoCollection.Add(repo)
|
||||
|
||||
publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
publishCollection.Add(publish)
|
||||
}
|
||||
|
||||
db.CompactDB()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := publishCollection.listReferencedFilesByComponent("test", []string{defaultComponent}, factory, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
"github.com/aptly-dev/aptly/aptly"
|
||||
"github.com/aptly-dev/aptly/database"
|
||||
@@ -450,13 +451,22 @@ type PublishedRepoCollectionSuite struct {
|
||||
var _ = Suite(&PublishedRepoCollectionSuite{})
|
||||
|
||||
func (s *PublishedRepoCollectionSuite) SetUpTest(c *C) {
|
||||
s.SetUpPackages()
|
||||
|
||||
s.db, _ = goleveldb.NewOpenDB(c.MkDir())
|
||||
s.factory = NewCollectionFactory(s.db)
|
||||
|
||||
s.snapshotCollection = s.factory.SnapshotCollection()
|
||||
|
||||
s.snap1 = NewSnapshotFromPackageList("snap1", []*Snapshot{}, NewPackageList(), "desc1")
|
||||
s.snap2 = NewSnapshotFromPackageList("snap2", []*Snapshot{}, NewPackageList(), "desc2")
|
||||
snap1Refs := NewPackageRefList()
|
||||
snap1Refs.Refs = [][]byte{s.p1.Key(""), s.p2.Key("")}
|
||||
sort.Sort(snap1Refs)
|
||||
s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, snap1Refs, "desc1")
|
||||
|
||||
snap2Refs := NewPackageRefList()
|
||||
snap2Refs.Refs = [][]byte{s.p3.Key("")}
|
||||
sort.Sort(snap2Refs)
|
||||
s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, snap2Refs, "desc2")
|
||||
|
||||
s.snapshotCollection.Add(s.snap1)
|
||||
s.snapshotCollection.Add(s.snap2)
|
||||
@@ -534,7 +544,7 @@ func (s *PublishedRepoCollectionSuite) TestUpdateLoadComplete(c *C) {
|
||||
c.Assert(r.sourceItems["main"].snapshot, IsNil)
|
||||
c.Assert(s.collection.LoadComplete(r, s.factory), IsNil)
|
||||
c.Assert(r.Sources["main"], Equals, s.repo1.sourceItems["main"].snapshot.UUID)
|
||||
c.Assert(r.RefList("main").Len(), Equals, 0)
|
||||
c.Assert(r.RefList("main").Len(), Equals, 2)
|
||||
|
||||
r, err = collection.ByStoragePrefixDistribution("", "ppa", "precise")
|
||||
c.Assert(err, IsNil)
|
||||
@@ -625,6 +635,51 @@ func (s *PublishedRepoCollectionSuite) TestByLocalRepo(c *C) {
|
||||
c.Check(s.collection.ByLocalRepo(s.localRepo), DeepEquals, []*PublishedRepo{s.repo4, s.repo5})
|
||||
}
|
||||
|
||||
func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) {
|
||||
c.Check(s.factory.PackageCollection().Update(s.p1), IsNil)
|
||||
c.Check(s.factory.PackageCollection().Update(s.p2), IsNil)
|
||||
c.Check(s.factory.PackageCollection().Update(s.p3), IsNil)
|
||||
|
||||
c.Check(s.collection.Add(s.repo1), IsNil)
|
||||
c.Check(s.collection.Add(s.repo2), IsNil)
|
||||
c.Check(s.collection.Add(s.repo4), IsNil)
|
||||
c.Check(s.collection.Add(s.repo5), IsNil)
|
||||
|
||||
files, err := s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil)
|
||||
c.Assert(err, IsNil)
|
||||
for _, v := range files {
|
||||
sort.Strings(v)
|
||||
}
|
||||
c.Check(files, DeepEquals, map[string][]string{
|
||||
"contrib": {
|
||||
"a/alien-arena/alien-arena-common_7.40-2_i386.deb",
|
||||
"a/alien-arena/mars-invaders_7.40-2_i386.deb",
|
||||
},
|
||||
"main": {"a/alien-arena/lonely-strangers_7.40-2_i386.deb"},
|
||||
})
|
||||
|
||||
snap3 := NewSnapshotFromRefList("snap3", []*Snapshot{}, s.snap2.RefList(), "desc3")
|
||||
s.snapshotCollection.Add(snap3)
|
||||
|
||||
// Ensure that adding a second publish point with matching files doesn't give duplicate results.
|
||||
repo3, err := NewPublishedRepo("", "", "anaconda-2", []string{}, []string{"main"}, []interface{}{snap3}, s.factory)
|
||||
c.Check(err, IsNil)
|
||||
c.Check(s.collection.Add(repo3), IsNil)
|
||||
|
||||
files, err = s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil)
|
||||
c.Assert(err, IsNil)
|
||||
for _, v := range files {
|
||||
sort.Strings(v)
|
||||
}
|
||||
c.Check(files, DeepEquals, map[string][]string{
|
||||
"contrib": {
|
||||
"a/alien-arena/alien-arena-common_7.40-2_i386.deb",
|
||||
"a/alien-arena/mars-invaders_7.40-2_i386.deb",
|
||||
},
|
||||
"main": {"a/alien-arena/lonely-strangers_7.40-2_i386.deb"},
|
||||
})
|
||||
}
|
||||
|
||||
type PublishedRepoRemoveSuite struct {
|
||||
PackageListMixinSuite
|
||||
db database.Storage
|
||||
|
||||
@@ -61,9 +61,11 @@ func (s *PackageListMixinSuite) SetUpPackages() {
|
||||
s.p1 = NewPackageFromControlFile(packageStanza.Copy())
|
||||
stanza := packageStanza.Copy()
|
||||
stanza["Package"] = "mars-invaders"
|
||||
stanza["Filename"] = "pool/contrib/m/mars-invaders/mars-invaders_7.40-2_i386.deb"
|
||||
s.p2 = NewPackageFromControlFile(stanza)
|
||||
stanza = packageStanza.Copy()
|
||||
stanza["Package"] = "lonely-strangers"
|
||||
stanza["Filename"] = "pool/contrib/l/lonely-strangers/lonely-strangers_7.40-2_i386.deb"
|
||||
s.p3 = NewPackageFromControlFile(stanza)
|
||||
|
||||
s.list.Add(s.p1)
|
||||
|
||||
Reference in New Issue
Block a user