Description: fix etag issue with s3 backend The S3 backend relies on ETag S3 returns being equal to the MD5 of the object, but it’s not necessarily true. For that purpose we store the MD5 object in a separate metadata field as well to make sure it isn’t lost. When the value returned clearly doesn’t look like a valid MD5 hash (length isn’t exactly 32 characters), attempt to retrieve the MD5 hash possibly stored in the metadata. . We cannot always do this since user-defined metadata isn’t returned by the ListObjects call, so verifying it for each object is expensive as it requires one HEAD request per each object. Author: Andrej Shadura Origin: upstream Bug: https://github.com/aptly-dev/aptly/issues/923 Bug-Debian: 983877 Applied-Upstream: commit:2422d3a Reviewed-by: Sebastien Delafond Last-Update: 2021-03-02 --- This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ --- diff --git a/s3/public.go b/s3/public.go index a3e85a99..d6c161ad 100644 --- a/s3/public.go +++ b/s3/public.go @@ -142,7 +142,7 @@ func (storage *PublishedStorage) PutFile(path string, sourceFilename string) err } defer source.Close() - err = storage.putFile(path, source) + err = storage.putFile(path, source, "") if err != nil { err = errors.Wrap(err, fmt.Sprintf("error uploading %s to %s", sourceFilename, storage)) } @@ -150,8 +150,22 @@ func (storage *PublishedStorage) PutFile(path string, sourceFilename string) err return err } +// getMD5 retrieves MD5 stored in the metadata, if any +func (storage *PublishedStorage) getMD5(path string) (string, error) { + params := &s3.HeadObjectInput{ + Bucket: aws.String(storage.bucket), + Key: aws.String(filepath.Join(storage.prefix, path)), + } + output, err := storage.s3.HeadObject(params) + if err != nil { + return "", err + } + + return aws.StringValue(output.Metadata["Md5"]), nil +} + // putFile uploads file-like object to -func (storage *PublishedStorage) putFile(path string, source io.ReadSeeker) error { +func (storage *PublishedStorage) putFile(path string, source io.ReadSeeker, sourceMD5 string) error { params := &s3.PutObjectInput{ Bucket: aws.String(storage.bucket), @@ -165,6 +179,11 @@ func (storage *PublishedStorage) putFile(path string, source io.ReadSeeker) erro if storage.encryptionMethod != "" { params.ServerSideEncryption = aws.String(storage.encryptionMethod) } + if sourceMD5 != "" { + params.Metadata = map[string]*string{ + "Md5": aws.String(sourceMD5), + } + } _, err := storage.s3.PutObject(params) if err != nil { @@ -177,7 +196,7 @@ func (storage *PublishedStorage) putFile(path string, source io.ReadSeeker) erro return err } - return storage.putFile(strings.Replace(path, "+", " ", -1), source) + return storage.putFile(strings.Replace(path, "+", " ", -1), source, sourceMD5) } return nil } @@ -298,6 +317,17 @@ func (storage *PublishedStorage) LinkFromPool(publishedDirectory, fileName strin sourceMD5 := sourceChecksums.MD5 if exists { + if len(destinationMD5) != 32 { + // doesn’t look like a valid MD5, + // attempt to fetch one from the metadata + var err error + destinationMD5, err = storage.getMD5(relPath) + if err != nil { + err = errors.Wrap(err, fmt.Sprintf("error verifying MD5 for %s: %s", storage, poolPath)) + return err + } + storage.pathCache[relPath] = destinationMD5 + } if sourceMD5 == "" { return fmt.Errorf("unable to compare object, MD5 checksum missing") } @@ -318,7 +348,7 @@ func (storage *PublishedStorage) LinkFromPool(publishedDirectory, fileName strin } defer source.Close() - err = storage.putFile(relPath, source) + err = storage.putFile(relPath, source, sourceMD5) if err == nil { storage.pathCache[relPath] = sourceMD5 } else {