Files
aptly/deb/publish_bench_test.go
Ryan Gonzalez 8cb1236a8c Improve publish cleanup perf when sources share most of their packages
The cleanup phase needs to list out all the files in each component in
order to determine what's still in use. When there's a large number of
sources (e.g. from having many snapshots), the time spent just loading
the package information becomes substantial. However, in many cases,
most of the packages being loaded are actually shared across the
sources; if you're taking frequent snapshots, for instance, most of the
packages in each snapshot will be the same as other snapshots. In these
cases, re-reading the packages repeatedly is just a waste of time.

To improve this, we maintain a list of refs that we know were processed
for each component. When listing the refs from a source, only the ones
that have not yet been processed will be examined. Some tests were also
added specifically to check listing the files in a component.

With this change, listing the files in components on a copy of our
production database went from >10 minutes to ~10 seconds, and the newly
added benchmark went from ~300ms to ~43ms.

Signed-off-by: Ryan Gonzalez <ryan.gonzalez@collabora.com>
2024-04-24 16:46:16 +02:00

114 lines
2.6 KiB
Go

package deb
import (
"fmt"
"os"
"sort"
"testing"
"github.com/aptly-dev/aptly/database/goleveldb"
)
func BenchmarkListReferencedFiles(b *testing.B) {
const defaultComponent = "main"
const repoCount = 16
const repoPackagesCount = 1024
const uniqPackagesCount = 64
tmpDir, err := os.MkdirTemp("", "aptly-bench")
if err != nil {
b.Fatal(err)
}
defer os.RemoveAll(tmpDir)
db, err := goleveldb.NewOpenDB(tmpDir)
if err != nil {
b.Fatal(err)
}
defer db.Close()
factory := NewCollectionFactory(db)
packageCollection := factory.PackageCollection()
repoCollection := factory.LocalRepoCollection()
publishCollection := factory.PublishedRepoCollection()
sharedRefs := NewPackageRefList()
{
transaction, err := db.OpenTransaction()
if err != nil {
b.Fatal(err)
}
for pkgIndex := 0; pkgIndex < repoPackagesCount-uniqPackagesCount; pkgIndex++ {
p := &Package{
Name: fmt.Sprintf("pkg-shared_%d", pkgIndex),
Version: "1",
Architecture: "amd64",
}
p.UpdateFiles(PackageFiles{PackageFile{
Filename: fmt.Sprintf("pkg-shared_%d.deb", pkgIndex),
}})
packageCollection.UpdateInTransaction(p, transaction)
sharedRefs.Refs = append(sharedRefs.Refs, p.Key(""))
}
sort.Sort(sharedRefs)
if err := transaction.Commit(); err != nil {
b.Fatal(err)
}
}
for repoIndex := 0; repoIndex < repoCount; repoIndex++ {
refs := NewPackageRefList()
transaction, err := db.OpenTransaction()
if err != nil {
b.Fatal(err)
}
for pkgIndex := 0; pkgIndex < uniqPackagesCount; pkgIndex++ {
p := &Package{
Name: fmt.Sprintf("pkg%d_%d", repoIndex, pkgIndex),
Version: "1",
Architecture: "amd64",
}
p.UpdateFiles(PackageFiles{PackageFile{
Filename: fmt.Sprintf("pkg%d_%d.deb", repoIndex, pkgIndex),
}})
packageCollection.UpdateInTransaction(p, transaction)
refs.Refs = append(refs.Refs, p.Key(""))
}
if err := transaction.Commit(); err != nil {
b.Fatal(err)
}
sort.Sort(refs)
repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment")
repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex)
repo.DefaultComponent = defaultComponent
repo.UpdateRefList(refs.Merge(sharedRefs, false, true))
repoCollection.Add(repo)
publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory)
if err != nil {
b.Fatal(err)
}
publishCollection.Add(publish)
}
db.CompactDB()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := publishCollection.listReferencedFilesByComponent("test", []string{defaultComponent}, factory, nil)
if err != nil {
b.Fatal(err)
}
}
}