From b4efe6a8101d5965e66ed58dae9cb5b6cfedcc39 Mon Sep 17 00:00:00 2001 From: Oliver Sauder Date: Wed, 20 Jun 2018 15:04:02 +0200 Subject: [PATCH] Add db cleanup api --- api/db.go | 186 +++++++++++++++++++++++++++++++++++++++++++ api/router.go | 3 + system/t12_api/db.py | 14 ++++ task/resources.go | 13 +++ 4 files changed, 216 insertions(+) create mode 100644 api/db.go create mode 100644 system/t12_api/db.py diff --git a/api/db.go b/api/db.go new file mode 100644 index 00000000..4ad45b14 --- /dev/null +++ b/api/db.go @@ -0,0 +1,186 @@ +package api + +import ( + "fmt" + "sort" + + "github.com/aptly-dev/aptly/deb" + "github.com/aptly-dev/aptly/task" + "github.com/aptly-dev/aptly/utils" + "github.com/gin-gonic/gin" +) + +// POST /api/db/cleanup +func apiDbCleanup(c *gin.Context) { + + resources := []string{string(task.AllResourcesKey)} + currTask, conflictErr := runTaskInBackground("Clean up db", resources, func(out *task.Output, detail *task.Detail) error { + var err error + + collectionFactory := context.NewCollectionFactory() + + // collect information about referenced packages... + existingPackageRefs := deb.NewPackageRefList() + + out.Printf("Loading mirrors, local repos, snapshots and published repos...") + err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + if e != nil { + return e + } + if repo.RefList() != nil { + existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + } + + return nil + }) + if err != nil { + return err + } + + err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { + e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + if e != nil { + return e + } + + if repo.RefList() != nil { + existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + } + + return nil + }) + if err != nil { + return err + } + + err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + + return nil + }) + if err != nil { + return err + } + + err = collectionFactory.PublishedRepoCollection().ForEach(func(published *deb.PublishedRepo) error { + if published.SourceKind != deb.SourceLocalRepo { + return nil + } + e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) + if e != nil { + return e + } + + for _, component := range published.Components() { + existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + } + return nil + }) + if err != nil { + return err + } + + // ... and compare it to the list of all packages + out.Printf("Loading list of all packages...") + allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() + + toDelete := allPackageRefs.Subtract(existingPackageRefs) + + // delete packages that are no longer referenced + out.Printf("Deleting unreferenced packages (%d)...", toDelete.Len()) + + // database can't err as collection factory already constructed + db, _ := context.Database() + + if toDelete.Len() > 0 { + batch := db.StartBatch() + toDelete.ForEach(func(ref []byte) error { + collectionFactory.PackageCollection().DeleteByKey(ref, batch) + return nil + }) + + err = db.FinishBatch(batch) + if err != nil { + return fmt.Errorf("unable to write to DB: %s", err) + } + } + + // now, build a list of files that should be present in Repository (package pool) + out.Printf("Building list of files referenced by packages...") + referencedFiles := make([]string, 0, existingPackageRefs.Len()) + + err = existingPackageRefs.ForEach(func(key []byte) error { + pkg, err2 := collectionFactory.PackageCollection().ByKey(key) + if err2 != nil { + tail := "" + return fmt.Errorf("unable to load package %s: %s%s", string(key), err2, tail) + } + paths, err2 := pkg.FilepathList(context.PackagePool()) + if err2 != nil { + return err2 + } + referencedFiles = append(referencedFiles, paths...) + + return nil + }) + if err != nil { + return err + } + + sort.Strings(referencedFiles) + + // build a list of files in the package pool + out.Printf("Building list of files in package pool...") + existingFiles, err := context.PackagePool().FilepathList(out) + if err != nil { + return fmt.Errorf("unable to collect file paths: %s", err) + } + + // find files which are in the pool but not referenced by packages + filesToDelete := utils.StrSlicesSubstract(existingFiles, referencedFiles) + + // delete files that are no longer referenced + out.Printf("Deleting unreferenced files (%d)...", len(filesToDelete)) + + countFilesToDelete := len(filesToDelete) + taskDetail := struct { + TotalNumberOfPackagesToDelete int + RemainingNumberOfPackagesToDelete int + }{ + countFilesToDelete, countFilesToDelete, + } + detail.Store(taskDetail) + + if countFilesToDelete > 0 { + var size, totalSize int64 + for _, file := range filesToDelete { + size, err = context.PackagePool().Remove(file) + if err != nil { + return err + } + + taskDetail.RemainingNumberOfPackagesToDelete-- + detail.Store(taskDetail) + totalSize += size + } + + out.Printf("Disk space freed: %s...", utils.HumanBytes(totalSize)) + } + + out.Printf("Compacting database...") + return db.CompactDB() + }) + + if conflictErr != nil { + c.AbortWithError(409, conflictErr) + return + } + + c.JSON(202, currTask) +} diff --git a/api/router.go b/api/router.go index 3d0ba506..bb94bdd8 100644 --- a/api/router.go +++ b/api/router.go @@ -124,6 +124,9 @@ func Router(c *ctx.AptlyContext) http.Handler { { root.GET("/graph.:ext", apiGraph) } + { + root.POST("/db/cleanup", apiDbCleanup) + } { root.GET("/tasks", apiTasksList) root.POST("/tasks-clear", apiTasksClear) diff --git a/system/t12_api/db.py b/system/t12_api/db.py new file mode 100644 index 00000000..0542fb33 --- /dev/null +++ b/system/t12_api/db.py @@ -0,0 +1,14 @@ +from api_lib import APITest + + +class DbAPITestCleanup(APITest): + """ + POST /db/cleanup + """ + + def check(self): + resp = self.post_task( + "/api/db/cleanup" + ) + + self.check_equal(resp.status_code, 200) diff --git a/task/resources.go b/task/resources.go index 703111d6..296a7a19 100644 --- a/task/resources.go +++ b/task/resources.go @@ -7,6 +7,9 @@ import ( // AllLocalReposResourcesKey to be used as resource key when all local repos are needed const AllLocalReposResourcesKey = "__alllocalrepos__" +// AllResourcesKey to be used as resource key when all resources are needed +const AllResourcesKey = "__all__" + // ResourceConflictError represents a list tasks // using conflicitng resources type ResourceConflictError struct { @@ -51,6 +54,12 @@ func (r *ResourcesSet) UsedBy(resources []string) []Task { tasks = appendTask(tasks, task) } } + } else if resource == AllResourcesKey { + for _, task := range r.set { + tasks = appendTask(tasks, task) + } + + break } task, found = r.set[resource] @@ -63,6 +72,10 @@ func (r *ResourcesSet) UsedBy(resources []string) []Task { if found { tasks = appendTask(tasks, task) } + task, found = r.set[AllResourcesKey] + if found { + tasks = appendTask(tasks, task) + } return tasks }