Allow disabling bzip2 compression for index files

Using bzip2 generates smaller index files (roughly 20% smaller Packages
files) but it comes with a big performance penalty.  When publishing a
debian mirror snapshot (amd64, arm64, armhf, source) without contents
skipping bzip speeds things up around 1.8 times.

```
$ hyperfine -w 1 -L skip-bz2 true,false  -m 3 -p "aptly -config aptly.conf publish drop bullseye || true" "aptly -config aptly.conf  publish snapshot  --skip-bz2={skip-bz2} --skip-contents --skip-signing bullseye"
Benchmark 1: aptly -config aptly.conf  publish snapshot  --skip-bz2=true --skip-contents --skip-signing bullseye
  Time (mean ± σ):     35.567 s ±  0.307 s    [User: 39.366 s, System: 10.075 s]
  Range (min … max):   35.311 s … 35.907 s    3 runs

Benchmark 2: aptly -config aptly.conf  publish snapshot  --skip-bz2=false --skip-contents --skip-signing bullseye
  Time (mean ± σ):     64.740 s ±  0.135 s    [User: 68.565 s, System: 10.129 s]
  Range (min … max):   64.596 s … 64.862 s    3 runs

Summary
  'aptly -config aptly.conf  publish snapshot  --skip-bz2=true --skip-contents --skip-signing bullseye' ran
    1.82 ± 0.02 times faster than 'aptly -config aptly.conf  publish snapshot  --skip-bz2=false --skip-contents --skip-signing bullseye'
```

Allow skipping bz2 creation for setups where faster publishing is more
important then Package file size.

Signed-off-by: Sjoerd Simons <sjoerd@collabora.com>
This commit is contained in:
Sjoerd Simons
2022-06-18 08:29:21 +02:00
committed by Benj Fassbind
parent 2aca913e92
commit f61514edaf
21 changed files with 204 additions and 10 deletions

View File

@@ -96,6 +96,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) {
ButAutomaticUpgrades string
ForceOverwrite bool
SkipContents *bool
SkipBz2 *bool
Architectures []string
Signing SigningOptions
AcquireByHash *bool
@@ -209,6 +210,11 @@ func apiPublishRepoOrSnapshot(c *gin.Context) {
published.SkipContents = *b.SkipContents
}
published.SkipBz2 = context.Config().SkipBz2Publishing
if b.SkipContents != nil {
published.SkipBz2 = *b.SkipBz2
}
if b.AcquireByHash != nil {
published.AcquireByHash = *b.AcquireByHash
}
@@ -243,6 +249,7 @@ func apiPublishUpdateSwitch(c *gin.Context) {
ForceOverwrite bool
Signing SigningOptions
SkipContents *bool
SkipBz2 *bool
SkipCleanup *bool
Snapshots []struct {
Component string `binding:"required"`
@@ -322,6 +329,10 @@ func apiPublishUpdateSwitch(c *gin.Context) {
published.SkipContents = *b.SkipContents
}
if b.SkipBz2 != nil {
published.SkipBz2 = *b.SkipBz2
}
if b.AcquireByHash != nil {
published.AcquireByHash = *b.AcquireByHash
}

View File

@@ -42,6 +42,7 @@ Example:
cmd.Flag.Bool("batch", false, "run GPG with detached tty")
cmd.Flag.Bool("skip-signing", false, "don't sign Release files with GPG")
cmd.Flag.Bool("skip-contents", false, "don't generate Contents indexes")
cmd.Flag.Bool("skip-bz2", false, "don't generate bzipped indexes")
cmd.Flag.String("origin", "", "origin name to publish")
cmd.Flag.String("notautomatic", "", "set value for NotAutomatic field")
cmd.Flag.String("butautomaticupgrades", "", "set value for ButAutomaticUpgrades field")

View File

@@ -139,6 +139,11 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error {
published.SkipContents = context.Flags().Lookup("skip-contents").Value.Get().(bool)
}
published.SkipBz2 = context.Config().SkipBz2Publishing
if context.Flags().IsSet("skip-bz2") {
published.SkipBz2 = context.Flags().Lookup("skip-bz2").Value.Get().(bool)
}
if context.Flags().IsSet("acquire-by-hash") {
published.AcquireByHash = context.Flags().Lookup("acquire-by-hash").Value.Get().(bool)
}
@@ -228,6 +233,7 @@ Example:
cmd.Flag.Bool("batch", false, "run GPG with detached tty")
cmd.Flag.Bool("skip-signing", false, "don't sign Release files with GPG")
cmd.Flag.Bool("skip-contents", false, "don't generate Contents indexes")
cmd.Flag.Bool("skip-bz2", false, "don't generate bzipped indexes")
cmd.Flag.String("origin", "", "overwrite origin name to publish")
cmd.Flag.String("notautomatic", "", "overwrite value for NotAutomatic field")
cmd.Flag.String("butautomaticupgrades", "", "overwrite value for ButAutomaticUpgrades field")

View File

@@ -96,6 +96,10 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error {
published.SkipContents = context.Flags().Lookup("skip-contents").Value.Get().(bool)
}
if context.Flags().IsSet("skip-bz2") {
published.SkipBz2 = context.Flags().Lookup("skip-bz2").Value.Get().(bool)
}
err = published.Publish(context.PackagePool(), context, collectionFactory, signer, context.Progress(), forceOverwrite)
if err != nil {
return fmt.Errorf("unable to publish: %s", err)
@@ -153,6 +157,7 @@ This command would switch published repository (with one component) named ppa/wh
cmd.Flag.Bool("batch", false, "run GPG with detached tty")
cmd.Flag.Bool("skip-signing", false, "don't sign Release files with GPG")
cmd.Flag.Bool("skip-contents", false, "don't generate Contents indexes")
cmd.Flag.Bool("skip-bz2", false, "don't generate bzipped indexes")
cmd.Flag.String("component", "", "component names to update (for multi-component publishing, separate components with commas)")
cmd.Flag.Bool("force-overwrite", false, "overwrite files in package pool in case of mismatch")
cmd.Flag.Bool("skip-cleanup", false, "don't remove unreferenced files in prefix/component")

View File

@@ -60,6 +60,10 @@ func aptlyPublishUpdate(cmd *commander.Command, args []string) error {
published.SkipContents = context.Flags().Lookup("skip-contents").Value.Get().(bool)
}
if context.Flags().IsSet("skip-bz2") {
published.SkipBz2 = context.Flags().Lookup("skip-bz2").Value.Get().(bool)
}
err = published.Publish(context.PackagePool(), context, collectionFactory, signer, context.Progress(), forceOverwrite)
if err != nil {
return fmt.Errorf("unable to publish: %s", err)
@@ -112,6 +116,7 @@ Example:
cmd.Flag.Bool("batch", false, "run GPG with detached tty")
cmd.Flag.Bool("skip-signing", false, "don't sign Release files with GPG")
cmd.Flag.Bool("skip-contents", false, "don't generate Contents indexes")
cmd.Flag.Bool("skip-bz2", false, "don't generate bzipped indexes")
cmd.Flag.Bool("force-overwrite", false, "overwrite files in package pool in case of mismatch")
cmd.Flag.Bool("skip-cleanup", false, "don't remove unreferenced files in prefix/component")

View File

@@ -446,6 +446,7 @@ local keyring="*-keyring=[gpg keyring to use when verifying Release file (could
"-passphrase-file=[GPG passphrasefile for the key (warning: could be insecure)]:passphrase file:_files"
"-secret-keyring=[GPG secret keyring to use (instead of default)]:secret-keyring:_files"
"-skip-contents=[dont generate Contents indexes]:$bool"
"-skip-bz2=[don't generate bzipped indexes]:$bool"
"-skip-signing=[dont sign Release files with GPG]:$bool"
)
local components_options=(

View File

@@ -503,7 +503,7 @@ _aptly()
"snapshot"|"repo")
if [[ $numargs -eq 0 ]]; then
if [[ "$cur" == -* ]]; then
COMPREPLY=($(compgen -W "-acquire-by-hash -batch -butautomaticupgrades= -component= -distribution= -force-overwrite -gpg-key= -keyring= -label= -suite= -notautomatic= -origin= -passphrase= -passphrase-file= -secret-keyring= -skip-contents -skip-signing" -- ${cur}))
COMPREPLY=($(compgen -W "-acquire-by-hash -batch -butautomaticupgrades= -component= -distribution= -force-overwrite -gpg-key= -keyring= -label= -suite= -notautomatic= -origin= -passphrase= -passphrase-file= -secret-keyring= -skip-contents -skip-bz2 -skip-signing" -- ${cur}))
else
if [[ "$subcmd" == "snapshot" ]]; then
COMPREPLY=($(compgen -W "$(__aptly_snapshot_list)" -- ${cur}))
@@ -528,7 +528,7 @@ _aptly()
"update")
if [[ $numargs -eq 0 ]]; then
if [[ "$cur" == -* ]]; then
COMPREPLY=($(compgen -W "-batch -force-overwrite -gpg-key= -keyring= -passphrase= -passphrase-file= -secret-keyring= -skip-cleanup -skip-contents -skip-signing" -- ${cur}))
COMPREPLY=($(compgen -W "-batch -force-overwrite -gpg-key= -keyring= -passphrase= -passphrase-file= -secret-keyring= -skip-cleanup -skip-contents -skip-bz2 -skip-signing" -- ${cur}))
else
COMPREPLY=($(compgen -W "$(__aptly_published_distributions)" -- ${cur}))
fi
@@ -543,7 +543,7 @@ _aptly()
"switch")
if [[ $numargs -eq 0 ]]; then
if [[ "$cur" == -* ]]; then
COMPREPLY=($(compgen -W "-batch -force-overwrite -component= -gpg-key= -keyring= -passphrase= -passphrase-file= -secret-keyring= -skip-cleanup -skip-contents -skip-signing" -- ${cur}))
COMPREPLY=($(compgen -W "-batch -force-overwrite -component= -gpg-key= -keyring= -passphrase= -passphrase-file= -secret-keyring= -skip-cleanup -skip-contents -skip-bz2 -skip-signing" -- ${cur}))
else
COMPREPLY=($(compgen -W "$(__aptly_published_distributions)" -- ${cur}))
fi

View File

@@ -22,6 +22,7 @@ type indexFiles struct {
suffix string
indexes map[string]*indexFile
acquireByHash bool
skipBz2 bool
}
type indexFile struct {
@@ -68,7 +69,7 @@ func (file *indexFile) Finalize(signer pgp.Signer) error {
}
if file.compressable {
err = utils.CompressFile(file.tempFile, file.onlyGzip)
err = utils.CompressFile(file.tempFile, file.onlyGzip || file.parent.skipBz2)
if err != nil {
file.tempFile.Close()
return fmt.Errorf("unable to compress index file: %s", err)
@@ -80,11 +81,15 @@ func (file *indexFile) Finalize(signer pgp.Signer) error {
exts := []string{""}
cksumExts := exts
if file.compressable {
exts = append(exts, ".gz", ".bz2")
cksumExts = exts
if file.onlyGzip {
exts = []string{".gz"}
cksumExts = []string{"", ".gz"}
} else {
exts = append(exts, ".gz")
if !file.parent.skipBz2 {
exts = append(exts, ".bz2")
}
cksumExts = exts
}
}
@@ -229,7 +234,7 @@ func packageIndexByHash(file *indexFile, ext string, hash string, sum string) er
return nil
}
func newIndexFiles(publishedStorage aptly.PublishedStorage, basePath, tempDir, suffix string, acquireByHash bool) *indexFiles {
func newIndexFiles(publishedStorage aptly.PublishedStorage, basePath, tempDir, suffix string, acquireByHash bool, skipBz2 bool) *indexFiles {
return &indexFiles{
publishedStorage: publishedStorage,
basePath: basePath,
@@ -239,6 +244,7 @@ func newIndexFiles(publishedStorage aptly.PublishedStorage, basePath, tempDir, s
suffix: suffix,
indexes: make(map[string]*indexFile),
acquireByHash: acquireByHash,
skipBz2: skipBz2,
}
}

View File

@@ -62,6 +62,9 @@ type PublishedRepo struct {
// Skip contents generation
SkipContents bool
// Skip bz2 compression for index files
SkipBz2 bool
// True if repo is being re-published
rePublishing bool
@@ -585,7 +588,7 @@ func (p *PublishedRepo) Publish(packagePool aptly.PackagePool, publishedStorageP
}
defer os.RemoveAll(tempDir)
indexes := newIndexFiles(publishedStorage, basePath, tempDir, suffix, p.AcquireByHash)
indexes := newIndexFiles(publishedStorage, basePath, tempDir, suffix, p.AcquireByHash, p.SkipBz2)
legacyContentIndexes := map[string]*ContentsIndex{}
var count int64

View File

@@ -19,6 +19,7 @@
"ppaDistributorID": "ubuntu",
"ppaCodename": "",
"skipContentsPublishing": false,
"skipBz2Publishing": false,
"FileSystemPublishEndpoints": {},
"S3PublishEndpoints": {},
"SwiftPublishEndpoints": {},

View File

@@ -19,6 +19,7 @@
"ppaDistributorID": "ubuntu",
"ppaCodename": "",
"skipContentsPublishing": false,
"skipBz2Publishing": false,
"FileSystemPublishEndpoints": {},
"S3PublishEndpoints": {},
"SwiftPublishEndpoints": {},

View File

@@ -0,0 +1,14 @@
Loading packages...
Generating metadata files and linking package files...
Finalizing metadata files...
Signing file 'Release' with gpg, please enter your passphrase when prompted:
Clearsigning file 'Release' with gpg, please enter your passphrase when prompted:
Local repo local-repo has been successfully published.
Please setup your webserver to serve directory '${HOME}/.aptly/public' with autoindexing.
Now you can add following line to apt sources:
deb http://your-server/ maverick main
deb-src http://your-server/ maverick main
Don't forget to add your GPG key to apt with apt-key.
You can also use `aptly serve` to publish your repositories over HTTP quickly.

View File

@@ -0,0 +1,13 @@
Loading packages...
Generating metadata files and linking package files...
Finalizing metadata files...
Signing file 'Release' with gpg, please enter your passphrase when prompted:
Clearsigning file 'Release' with gpg, please enter your passphrase when prompted:
Snapshot snap40 has been successfully published.
Please setup your webserver to serve directory '${HOME}/.aptly/public' with autoindexing.
Now you can add following line to apt sources:
deb http://your-server/ maverick main
Don't forget to add your GPG key to apt with apt-key.
You can also use `aptly serve` to publish your repositories over HTTP quickly.

View File

@@ -0,0 +1,8 @@
Loading packages...
Generating metadata files and linking package files...
Finalizing metadata files...
Signing file 'Release' with gpg, please enter your passphrase when prompted:
Clearsigning file 'Release' with gpg, please enter your passphrase when prompted:
Cleaning up prefix "." components main...
Publish for snapshot ./maverick (origin: LP-PPA-gladky-anton-gnuplot) [amd64, i386] publishes {main: [snap3]: Pulled into 'snap2' with 'snap1' as source, pull request was: 'gnuplot-x11'} has been successfully switched to new snapshot.

View File

@@ -0,0 +1,8 @@
Loading packages...
Generating metadata files and linking package files...
Finalizing metadata files...
Signing file 'Release' with gpg, please enter your passphrase when prompted:
Clearsigning file 'Release' with gpg, please enter your passphrase when prompted:
Cleaning up prefix "." components main...
Publish for local repo ./maverick [i386, source] publishes {main: [local-repo]} has been successfully updated.

View File

@@ -29,8 +29,7 @@ class PublishRepo1Test(BaseTest):
self.check_exists('public/dists/maverick/main/binary-i386/Packages')
self.check_exists('public/dists/maverick/main/binary-i386/Packages.gz')
self.check_exists(
'public/dists/maverick/main/binary-i386/Packages.bz2')
self.check_exists('public/dists/maverick/main/binary-i386/Packages.bz2')
self.check_exists('public/dists/maverick/main/Contents-i386.gz')
self.check_exists('public/dists/maverick/main/source/Sources')
self.check_exists('public/dists/maverick/main/source/Sources.gz')
@@ -862,3 +861,30 @@ class PublishRepo32Test(BaseTest):
"--verify", os.path.join(
os.environ["HOME"], ".aptly", 'public/dists/maverick/Release.gpg'),
os.path.join(os.environ["HOME"], ".aptly", 'public/dists/maverick/Release')])
class PublishRepo33Test(BaseTest):
"""
publish repo: -skip-bz2
"""
fixtureCmds = [
"aptly repo create local-repo",
"aptly repo add local-repo ${files} ${udebs}",
]
runCmd = "aptly publish repo -keyring=${files}/aptly.pub -secret-keyring=${files}/aptly.sec -distribution=maverick -skip-bz2 local-repo"
gold_processor = BaseTest.expand_environ
def check(self):
super(PublishRepo33Test, self).check()
self.check_exists('public/dists/maverick/Release')
self.check_exists('public/dists/maverick/main/binary-i386/Release')
self.check_exists('public/dists/maverick/main/binary-i386/Packages')
self.check_exists('public/dists/maverick/main/binary-i386/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-i386/Packages.bz2')
self.check_exists('public/dists/maverick/main/binary-amd64/Release')
self.check_exists('public/dists/maverick/main/binary-amd64/Packages')
self.check_exists('public/dists/maverick/main/binary-amd64/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-amd64/Packages.bz2')

View File

@@ -1210,3 +1210,32 @@ class PublishSnapshot39Test(BaseTest):
'contents_i386', match_prepare=ungzip_if_required, mode='b', ensure_utf8=False)
self.check_file_contents('public/dists/maverick/main/Contents-amd64.gz',
'contents_amd64', match_prepare=ungzip_if_required, mode='b', ensure_utf8=False)
class PublishSnapshot40Test(BaseTest):
"""
publish snapshot: -skip-bz2
"""
fixtureDB = True
fixturePool = True
fixtureCmds = [
"aptly snapshot create snap40 from mirror gnuplot-maverick",
]
runCmd = "aptly publish snapshot -keyring=${files}/aptly.pub -secret-keyring=${files}/aptly.sec -skip-bz2 snap40"
gold_processor = BaseTest.expand_environ
def check(self):
super(PublishSnapshot40Test, self).check()
self.check_exists('public/dists/maverick/Release')
self.check_exists('public/dists/maverick/Release.gpg')
self.check_exists('public/dists/maverick/main/binary-i386/Release')
self.check_exists('public/dists/maverick/main/binary-amd64/Release')
self.check_exists('public/dists/maverick/main/binary-i386/Packages')
self.check_exists('public/dists/maverick/main/binary-i386/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-i386/Packages.bz2')
self.check_exists('public/dists/maverick/main/binary-amd64/Packages')
self.check_exists('public/dists/maverick/main/binary-amd64/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-amd64/Packages.bz2')

View File

@@ -545,3 +545,32 @@ class PublishSwitch14Test(BaseTest):
'main/binary-amd64/Release', 'main/binary-i386/Release', 'main/Contents-amd64.gz',
'main/Contents-i386.gz', 'Contents-i386.gz', 'Contents-amd64.gz']):
raise Exception("path seen wrong: %r" % (pathsSeen, ))
class PublishSwitch15Test(BaseTest):
"""
publish switch: -skip-bz2
"""
fixtureDB = True
fixturePool = True
fixtureCmds = [
"aptly snapshot create snap1 from mirror gnuplot-maverick",
"aptly snapshot create snap2 empty",
"aptly snapshot pull -no-deps -architectures=i386,amd64 snap2 snap1 snap3 gnuplot-x11",
"aptly publish snapshot -keyring=${files}/aptly.pub -secret-keyring=${files}/aptly.sec -distribution=maverick -skip-bz2 snap1",
]
runCmd = "aptly publish switch -keyring=${files}/aptly.pub -secret-keyring=${files}/aptly.sec maverick snap3"
gold_processor = BaseTest.expand_environ
def check(self):
super(PublishSwitch15Test, self).check()
self.check_exists('public/dists/maverick/Release')
self.check_exists('public/dists/maverick/main/binary-i386/Packages')
self.check_exists('public/dists/maverick/main/binary-i386/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-i386/Packages.bz2')
self.check_exists('public/dists/maverick/main/binary-amd64/Packages')
self.check_exists('public/dists/maverick/main/binary-amd64/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-amd64/Packages.bz2')

View File

@@ -437,3 +437,28 @@ class PublishUpdate12Test(BaseTest):
'main/binary-i386/Release', 'main/source/Release', 'main/Contents-i386.gz',
'Contents-i386.gz']):
raise Exception("path seen wrong: %r" % (pathsSeen, ))
class PublishUpdate13Test(BaseTest):
"""
publish update: -skip-bz2
"""
fixtureCmds = [
"aptly repo create local-repo",
"aptly repo add local-repo ${files}/",
"aptly publish repo -keyring=${files}/aptly.pub -secret-keyring=${files}/aptly.sec -distribution=maverick -skip-bz2 local-repo",
"aptly repo remove local-repo pyspi"
]
runCmd = "aptly publish update -keyring=${files}/aptly.pub -secret-keyring=${files}/aptly.sec -skip-bz2 maverick"
gold_processor = BaseTest.expand_environ
def check(self):
super(PublishUpdate13Test, self).check()
self.check_exists('public/dists/maverick/InRelease')
self.check_exists('public/dists/maverick/Release')
self.check_exists('public/dists/maverick/Release.gpg')
self.check_exists('public/dists/maverick/main/binary-i386/Packages')
self.check_exists('public/dists/maverick/main/binary-i386/Packages.gz')
self.check_not_exists('public/dists/maverick/main/binary-i386/Packages.bz2')

View File

@@ -28,6 +28,7 @@ type ConfigStructure struct { // nolint: maligned
PpaDistributorID string `json:"ppaDistributorID"`
PpaCodename string `json:"ppaCodename"`
SkipContentsPublishing bool `json:"skipContentsPublishing"`
SkipBz2Publishing bool `json:"skipBz2Publishing"`
FileSystemPublishRoots map[string]FileSystemPublishRoot `json:"FileSystemPublishEndpoints"`
S3PublishRoots map[string]S3PublishRoot `json:"S3PublishEndpoints"`
SwiftPublishRoots map[string]SwiftPublishRoot `json:"SwiftPublishEndpoints"`

View File

@@ -79,6 +79,7 @@ func (s *ConfigSuite) TestSaveConfig(c *C) {
" \"ppaDistributorID\": \"\",\n"+
" \"ppaCodename\": \"\",\n"+
" \"skipContentsPublishing\": false,\n"+
" \"skipBz2Publishing\": false,\n"+
" \"FileSystemPublishEndpoints\": {\n"+
" \"test\": {\n"+
" \"rootDir\": \"/opt/aptly-publish\",\n"+