publish: fix race conditions

* remove useless resource lock
  Resource locks need to be before the background task. creating same publish endpoint at the same time is unlikely...
* load data inside background tasks
  This fixes a flaw in async apis, which loaded the published repo from the DB and mutated it outside the task closure, before the task lock was acquired.
  Perform collection.LoadComplete inside maybeRunTaskInBackground and have tasks use a fresh copy of taskCollectionFactory, taskCollection
* lock source repos/snapshots for publish operations
  Concurrent tasks were not properly locking their resources, leading to inconsistent published indexes:
  SourceLocalRepo: iterate published.Sources (component -> source UUID), look up each local repo via localRepoCollection.ByUUID and append string(repo.Key()) to resources
  SourceSnapshot: iterate b.Snapshots,look up each snapshot via snapshotCollection.ByName and append string(snapshot.ResourceKey()) to resources.
* lock pool on non MultiDist publish
* revert mutex on LinkFromPool
* use uuids, since names can be renamed
This commit is contained in:
André Roth
2026-05-20 13:40:37 +00:00
parent 2974558aa7
commit 80e4e9bdac
8 changed files with 1294 additions and 519 deletions
-283
View File
@@ -1,283 +0,0 @@
package files
import (
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/aptly-dev/aptly/aptly"
"github.com/aptly-dev/aptly/utils"
. "gopkg.in/check.v1"
)
type LinkFromPoolConcurrencySuite struct {
root string
poolDir string
storage *PublishedStorage
pool *PackagePool
cs aptly.ChecksumStorage
testFile string
testContent []byte
testChecksums utils.ChecksumInfo
srcPoolPath string
}
var _ = Suite(&LinkFromPoolConcurrencySuite{})
func (s *LinkFromPoolConcurrencySuite) SetUpTest(c *C) {
s.root = c.MkDir()
s.poolDir = filepath.Join(s.root, "pool")
publishDir := filepath.Join(s.root, "public")
// Create package pool and published storage
s.pool = NewPackagePool(s.poolDir, true)
s.storage = NewPublishedStorage(publishDir, "copy", "checksum")
s.cs = NewMockChecksumStorage()
// Create test file content
s.testContent = []byte("test package content for concurrency testing")
s.testFile = filepath.Join(s.root, "test-package.deb")
err := os.WriteFile(s.testFile, s.testContent, 0644)
c.Assert(err, IsNil)
// Calculate checksums
md5sum, err := utils.MD5ChecksumForFile(s.testFile)
c.Assert(err, IsNil)
s.testChecksums = utils.ChecksumInfo{
Size: int64(len(s.testContent)),
MD5: md5sum,
}
// Import the test file into the pool
s.srcPoolPath, err = s.pool.Import(s.testFile, "test-package.deb", &s.testChecksums, false, s.cs)
c.Assert(err, IsNil)
}
func (s *LinkFromPoolConcurrencySuite) TestLinkFromPoolConcurrency(c *C) {
// Test concurrent LinkFromPool operations to ensure no race conditions
concurrency := 5000
iterations := 10
for iter := 0; iter < iterations; iter++ {
c.Logf("Iteration %d: Testing concurrent LinkFromPool with %d goroutines", iter+1, concurrency)
destPath := fmt.Sprintf("main/t/test%d", iter)
var wg sync.WaitGroup
errors := make(chan error, concurrency)
successes := make(chan struct{}, concurrency)
start := time.Now()
// Launch concurrent LinkFromPool operations
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Use force=true to test the most vulnerable code path (remove-then-create)
err := s.storage.LinkFromPool(
"", // publishedPrefix
destPath, // publishedRelPath
"test-package.deb", // fileName
s.pool, // sourcePool
s.srcPoolPath, // sourcePath
s.testChecksums, // sourceChecksums
true, // force - this triggers vulnerable remove-then-create pattern
)
if err != nil {
errors <- fmt.Errorf("goroutine %d failed: %v", id, err)
} else {
successes <- struct{}{}
}
}(i)
}
// Wait for completion
wg.Wait()
duration := time.Since(start)
close(errors)
close(successes)
// Count results
errorCount := 0
successCount := 0
var firstError error
for err := range errors {
errorCount++
if firstError == nil {
firstError = err
}
c.Logf("Race condition error: %v", err)
}
for range successes {
successCount++
}
c.Logf("Results: %d successes, %d errors, took %v", successCount, errorCount, duration)
// Assert no race conditions occurred
if errorCount > 0 {
c.Fatalf("Race condition detected in iteration %d! "+
"Errors: %d out of %d operations (%.1f%% failure rate). "+
"First error: %v. "+
"This indicates the fix is not working properly.",
iter+1, errorCount, concurrency,
float64(errorCount)/float64(concurrency)*100, firstError)
}
// Verify the final file exists and has correct content
finalFile := filepath.Join(s.storage.rootPath, destPath, "test-package.deb")
_, err := os.Stat(finalFile)
c.Assert(err, IsNil, Commentf("Final file should exist after concurrent operations"))
content, err := os.ReadFile(finalFile)
c.Assert(err, IsNil, Commentf("Should be able to read final file"))
c.Assert(content, DeepEquals, s.testContent, Commentf("File content should be intact after concurrent operations"))
c.Logf("✓ Iteration %d: No race conditions detected", iter+1)
}
c.Logf("SUCCESS: Handled %d total concurrent operations across %d iterations with no race conditions",
concurrency*iterations, iterations)
}
func (s *LinkFromPoolConcurrencySuite) TestLinkFromPoolConcurrencyDifferentFiles(c *C) {
// Test concurrent operations on different files to ensure no blocking
concurrency := 10
var wg sync.WaitGroup
errors := make(chan error, concurrency)
start := time.Now()
// Launch concurrent operations on different destination files
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
destPath := fmt.Sprintf("main/t/test-file-%d", id)
err := s.storage.LinkFromPool(
"", // publishedPrefix
destPath, // publishedRelPath
"test-package.deb", // fileName
s.pool, // sourcePool
s.srcPoolPath, // sourcePath
s.testChecksums, // sourceChecksums
false, // force
)
if err != nil {
errors <- fmt.Errorf("goroutine %d failed: %v", id, err)
}
}(i)
}
// Wait for completion
wg.Wait()
duration := time.Since(start)
close(errors)
// Count errors
errorCount := 0
for err := range errors {
errorCount++
c.Logf("Error: %v", err)
}
c.Assert(errorCount, Equals, 0, Commentf("No errors should occur when linking to different files"))
c.Logf("SUCCESS: %d concurrent operations on different files completed in %v", concurrency, duration)
// Verify all files were created correctly
for i := 0; i < concurrency; i++ {
finalFile := filepath.Join(s.storage.rootPath, fmt.Sprintf("main/t/test-file-%d", i), "test-package.deb")
_, err := os.Stat(finalFile)
c.Assert(err, IsNil, Commentf("File %d should exist", i))
content, err := os.ReadFile(finalFile)
c.Assert(err, IsNil, Commentf("Should be able to read file %d", i))
c.Assert(content, DeepEquals, s.testContent, Commentf("File %d content should be correct", i))
}
}
func (s *LinkFromPoolConcurrencySuite) TestLinkFromPoolWithoutForceNoConcurrencyIssues(c *C) {
// Test that when force=false, concurrent operations fail gracefully without corruption
concurrency := 20
destPath := "main/t/single-dest"
var wg sync.WaitGroup
errors := make(chan error, concurrency)
successes := make(chan struct{}, concurrency)
// First, create the file so subsequent operations will conflict
err := s.storage.LinkFromPool("", destPath, "test-package.deb", s.pool, s.srcPoolPath, s.testChecksums, false)
c.Assert(err, IsNil)
start := time.Now()
// Launch concurrent operations that should mostly fail
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
err := s.storage.LinkFromPool(
"", // publishedPrefix
destPath, // publishedRelPath
"test-package.deb", // fileName
s.pool, // sourcePool
s.srcPoolPath, // sourcePath
s.testChecksums, // sourceChecksums
false, // force=false - should fail if file exists and is same
)
if err != nil {
errors <- err
} else {
successes <- struct{}{}
}
}(i)
}
// Wait for completion
wg.Wait()
duration := time.Since(start)
close(errors)
close(successes)
errorCount := 0
successCount := 0
for range errors {
errorCount++
}
for range successes {
successCount++
}
c.Logf("Results with force=false: %d successes, %d errors, took %v", successCount, errorCount, duration)
// With force=false and identical files, operations should succeed (file already exists with same content)
// No race conditions should cause crashes or corruption
c.Assert(errorCount, Equals, 0, Commentf("With identical files and force=false, operations should succeed"))
// Verify the file still exists and has correct content
finalFile := filepath.Join(s.storage.rootPath, destPath, "test-package.deb")
content, err := os.ReadFile(finalFile)
c.Assert(err, IsNil)
c.Assert(content, DeepEquals, s.testContent, Commentf("File should not be corrupted by concurrent access"))
}
-25
View File
@@ -26,26 +26,6 @@ type PublishedStorage struct {
verifyMethod uint
}
// Global mutex map to prevent concurrent access to the same destinationPath in LinkFromPool
var (
fileLockMutex sync.Mutex
fileLocks = make(map[string]*sync.Mutex)
)
// getFileLock returns a mutex for a specific file path to prevent concurrent modifications
func getFileLock(filePath string) *sync.Mutex {
fileLockMutex.Lock()
defer fileLockMutex.Unlock()
if mutex, exists := fileLocks[filePath]; exists {
return mutex
}
mutex := &sync.Mutex{}
fileLocks[filePath] = mutex
return mutex
}
// Check interfaces
var (
_ aptly.PublishedStorage = (*PublishedStorage)(nil)
@@ -172,11 +152,6 @@ func (storage *PublishedStorage) LinkFromPool(publishedPrefix, publishedRelPath,
poolPath := filepath.Join(storage.rootPath, publishedPrefix, publishedRelPath, filepath.Dir(fileName))
destinationPath := filepath.Join(poolPath, baseName)
// Acquire file-specific lock to prevent concurrent access to the same file
fileLock := getFileLock(destinationPath)
fileLock.Lock()
defer fileLock.Unlock()
var localSourcePool aptly.LocalPackagePool
if storage.linkMethod != LinkMethodCopy {
pp, ok := sourcePool.(aptly.LocalPackagePool)
-10
View File
@@ -632,16 +632,6 @@ func (s *DiskFullNoRootSuite) TestLinkFromPoolCopySyncErrorIsReturned(c *C) {
c.Check(strings.Contains(err.Error(), "error syncing file"), Equals, true)
}
func (s *DiskFullNoRootSuite) TestGetFileLockReusesMutex(c *C) {
a := getFileLock(filepath.Join(s.root, "a"))
b := getFileLock(filepath.Join(s.root, "a"))
c.Check(a == b, Equals, true)
c1 := getFileLock(filepath.Join(s.root, "c1"))
c2 := getFileLock(filepath.Join(s.root, "c2"))
c.Check(c1 == c2, Equals, false)
}
func (s *DiskFullNoRootSuite) TestPutFileFailsIfDestinationDirMissing(c *C) {
storage := NewPublishedStorage(s.root, "", "")