From 5a75e45ba8a47f4450513efa954a0c4f4729fcbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Mon, 25 May 2026 12:17:05 +0000 Subject: [PATCH] fix(mirror): eliminate race conditions by using fresh factory inside task closures Affected endpoints: apiMirrorsDrop, apiMirrorsUpdate. Both endpoints shared the same architectural flaw as the previously fixed publish, repos, and snapshot endpoints: operations were performed outside the task lock, with stale DB state used inside the lock. Issues Fixed: 1. apiMirrorsDrop - Collections created before task lock Problem: mirrorCollection and snapshotCollection created before task lock. Snapshot dependency check done with stale factory. Concurrent drops both load pre-task state, both see same snapshot dependencies. If snapshots created after pre-task check, can delete mirror used by snapshots. Fix: Create fresh taskCollectionFactory inside task, fresh load of mirror after lock acquired, fresh snapshot check with current factory, drop using fresh collections. 2. apiMirrorsUpdate - Mirror loaded before task lock Problem: remote loaded outside task, rename duplicate check with stale factory. Concurrent updates both load pre-task state, long-running update uses stale mirror reference. TOCTOU race: rename check passes, another creates mirror with same name, update saves with stale data. Fix: Create fresh taskCollectionFactory inside task, fresh load of mirror after lock acquired, pre-task rename validation, fresh rename check inside lock, use fresh mirror and collections for all operations. Root cause analysis: The fundamental issue is the split between pre-task work and task-protected work. Collections and objects were being loaded before lock acquisition, then stale copies used inside the lock. Correct pattern (from fixed publish.go, repos.go, and snapshot.go): 1. HTTP Handler (before task lock): - Shallow load for 404 check only - Extract resource keys - Submit task with resources 2. Task Closure (after lock acquired): - Create fresh collectionFactory - Fresh load of all objects - LoadComplete on fresh copies - All mutations on fresh state - All checks atomic inside lock - Save using fresh collections This ensures: - Concurrent operations are serialized by task queue - No stale DB state used for mutations - No lost updates from concurrent modifications - No TOCTOU races on duplicate checks - No loss of mirrors used by snapshots - No stale data in long-running updates --- api/mirror.go | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/api/mirror.go b/api/mirror.go index 743101fc..7e46b5b4 100644 --- a/api/mirror.go +++ b/api/mirror.go @@ -216,9 +216,9 @@ func apiMirrorsDrop(c *gin.Context) { name := c.Params.ByName("name") force := c.Request.URL.Query().Get("force") == "1" + // Phase 1: Pre-task validation (shallow load for 404 check only) collectionFactory := context.NewCollectionFactory() mirrorCollection := collectionFactory.RemoteRepoCollection() - snapshotCollection := collectionFactory.SnapshotCollection() repo, err := mirrorCollection.ByName(name) if err != nil { @@ -228,21 +228,34 @@ func apiMirrorsDrop(c *gin.Context) { resources := []string{string(repo.Key())} taskName := fmt.Sprintf("Delete mirror %s", name) + maybeRunTaskInBackground(c, taskName, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err := repo.CheckLock() + // Phase 2: Inside task lock - create fresh collections + taskCollectionFactory := context.NewCollectionFactory() + taskMirrorCollection := taskCollectionFactory.RemoteRepoCollection() + taskSnapshotCollection := taskCollectionFactory.SnapshotCollection() + + // Fresh load after lock acquired + repo, err := taskMirrorCollection.ByName(name) + if err != nil { + return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to drop: %v", err) + } + + err = repo.CheckLock() if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to drop: %v", err) } if !force { - snapshots := snapshotCollection.ByRemoteRepoSource(repo) + // Fresh checks with current collections + snapshots := taskSnapshotCollection.ByRemoteRepoSource(repo) if len(snapshots) > 0 { return &task.ProcessReturnValue{Code: http.StatusForbidden, Value: nil}, fmt.Errorf("won't delete mirror with snapshots, use 'force=1' to override") } } - err = mirrorCollection.Drop(repo) + err = taskMirrorCollection.Drop(repo) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to drop: %v", err) } @@ -550,6 +563,7 @@ func apiMirrorsUpdate(c *gin.Context) { return } + // Pre-task validation of new name if provided if b.Name != remote.Name { _, err = collection.ByName(b.Name) if err == nil { @@ -566,9 +580,26 @@ func apiMirrorsUpdate(c *gin.Context) { resources := []string{string(remote.Key())} maybeRunTaskInBackground(c, "Update mirror "+b.Name, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { + // Phase 2: Inside task lock - create fresh factory + taskCollectionFactory := context.NewCollectionFactory() + taskCollection := taskCollectionFactory.RemoteRepoCollection() + + // Fresh load after lock acquired + remote, err := taskCollection.ByName(c.Params.ByName("name")) + if err != nil { + return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) + } + + // Fresh rename check inside lock (if renaming) + if b.Name != remote.Name { + _, err := taskCollection.ByName(b.Name) + if err == nil { + return &task.ProcessReturnValue{Code: http.StatusConflict, Value: nil}, fmt.Errorf("unable to rename: mirror %s already exists", b.Name) + } + } downloader := context.NewDownloader(out) - err := remote.Fetch(downloader, verifier, b.IgnoreSignatures) + err = remote.Fetch(downloader, verifier, b.IgnoreSignatures) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } @@ -780,8 +811,8 @@ func apiMirrorsUpdate(c *gin.Context) { } log.Info().Msgf("%s: Finalizing download...", b.Name) - _ = remote.FinalizeDownload(collectionFactory, out) - err = collectionFactory.RemoteRepoCollection().Update(remote) + _ = remote.FinalizeDownload(taskCollectionFactory, out) + err = taskCollection.Update(remote) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) }