When ETag doesn’t look like MD5, use the value from metadata instead

The S3 backend relies on ETag S3 returns being equal to the MD5 of the
object, but it’s not necessarily true. When the value returned clearly
doesn’t look like a valid MD5 hash (length isn’t exactly 32 characters),
attempt to retrieve the MD5 hash possibly stored in the metadata.

We cannot always do this since user-defined metadata isn’t returned by
the ListObjects call, so verifying it for each object is expensive as it
requires one HEAD request per each object.

This commit fixes #923.

Signed-off-by: Andrej Shadura <andrew.shadura@collabora.co.uk>
This commit is contained in:
Andrej Shadura
2020-07-22 19:40:23 +02:00
committed by Lorenzo Bolla
parent 960cf76c42
commit 2422d3ab40

View File

@@ -150,6 +150,20 @@ func (storage *PublishedStorage) PutFile(path string, sourceFilename string) err
return err
}
// getMD5 retrieves MD5 stored in the metadata, if any
func (storage *PublishedStorage) getMD5(path string) (string, error) {
params := &s3.HeadObjectInput{
Bucket: aws.String(storage.bucket),
Key: aws.String(filepath.Join(storage.prefix, path)),
}
output, err := storage.s3.HeadObject(params)
if err != nil {
return "", err
}
return aws.StringValue(output.Metadata["Md5"]), nil
}
// putFile uploads file-like object to
func (storage *PublishedStorage) putFile(path string, source io.ReadSeeker, sourceMD5 string) error {
@@ -303,6 +317,17 @@ func (storage *PublishedStorage) LinkFromPool(publishedDirectory, fileName strin
sourceMD5 := sourceChecksums.MD5
if exists {
if len(destinationMD5) != 32 {
// doesnt look like a valid MD5,
// attempt to fetch one from the metadata
var err error
destinationMD5, err = storage.getMD5(relPath)
if err != nil {
err = errors.Wrap(err, fmt.Sprintf("error verifying MD5 for %s: %s", storage, poolPath))
return err
}
storage.pathCache[relPath] = destinationMD5
}
if sourceMD5 == "" {
return fmt.Errorf("unable to compare object, MD5 checksum missing")
}