Files
aptly/deb/remote.go
T
Andrey Smirnov 0f4bbc4752 Implement lazy iteration (ForEach) over collections
See #761

aptly had a concept of loading small amount of info per each object
into memory once collection is accessed for the first time.

This might have simplified some operations, but it doesn't scale well
with huge aptly databases.

This is just intermediate step towards better memory management -
list of objects is not loaded unless some method is called.
`ForEach` method (mainly used in cleanup) is reimplemented to
iterate over database without ever loading all the objects into memory.

Memory was even worse with previous approach, as for each item usually
`LoadComplete()` is called, which pulls even more data into memory
and item stays in memory till the end of the iteration as it is referenced
from `collection.list`.

For the subsequent PR: reimplement `ByUUID()` and probably other methods
to avoid loading all the items into memory, at least for all the collecitons
except for published repos. When published repository is being loaded, it
might pull source local repo which in turn would trigger loading for all the
local repos which is not acceptable.
2018-08-04 00:26:02 +03:00

806 lines
21 KiB
Go

package deb
import (
"bytes"
gocontext "context"
"fmt"
"log"
"net/url"
"os"
"path"
"sort"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/aptly-dev/aptly/aptly"
"github.com/aptly-dev/aptly/database"
"github.com/aptly-dev/aptly/http"
"github.com/aptly-dev/aptly/pgp"
"github.com/aptly-dev/aptly/utils"
"github.com/pborman/uuid"
"github.com/ugorji/go/codec"
)
// RemoteRepo statuses
const (
MirrorIdle = iota
MirrorUpdating
)
// RemoteRepo represents remote (fetchable) Debian repository.
//
// Repostitory could be filtered when fetching by components, architectures
type RemoteRepo struct {
// Permanent internal ID
UUID string
// User-assigned name
Name string
// Root of Debian archive, URL
ArchiveRoot string
// Distribution name, e.g. squeeze
Distribution string
// List of components to fetch, if empty, then fetch all components
Components []string
// List of architectures to fetch, if empty, then fetch all architectures
Architectures []string
// Meta-information about repository
Meta Stanza
// Last update date
LastDownloadDate time.Time
// Checksums for release files
ReleaseFiles map[string]utils.ChecksumInfo
// Filter for packages
Filter string
// Status marks state of repository (being updated, no action)
Status int
// WorkerPID is PID of the process modifying the mirror (if any)
WorkerPID int
// FilterWithDeps to include dependencies from filter query
FilterWithDeps bool
// SkipComponentCheck skips component list verification
SkipComponentCheck bool
// SkipArchitectureCheck skips architecture list verification
SkipArchitectureCheck bool
// Should we download sources?
DownloadSources bool
// Should we download .udebs?
DownloadUdebs bool
// "Snapshot" of current list of packages
packageRefs *PackageRefList
// Parsed archived root
archiveRootURL *url.URL
// Current list of packages (filled while updating mirror)
packageList *PackageList
}
// NewRemoteRepo creates new instance of Debian remote repository with specified params
func NewRemoteRepo(name string, archiveRoot string, distribution string, components []string,
architectures []string, downloadSources bool, downloadUdebs bool) (*RemoteRepo, error) {
result := &RemoteRepo{
UUID: uuid.New(),
Name: name,
ArchiveRoot: archiveRoot,
Distribution: distribution,
Components: components,
Architectures: architectures,
DownloadSources: downloadSources,
DownloadUdebs: downloadUdebs,
}
err := result.prepare()
if err != nil {
return nil, err
}
if strings.HasSuffix(result.Distribution, "/") || strings.HasPrefix(result.Distribution, ".") {
// flat repo
if !strings.HasPrefix(result.Distribution, ".") {
result.Distribution = "./" + result.Distribution
}
result.Architectures = nil
if len(result.Components) > 0 {
return nil, fmt.Errorf("components aren't supported for flat repos")
}
if result.DownloadUdebs {
return nil, fmt.Errorf("debian-installer udebs aren't supported for flat repos")
}
result.Components = nil
}
return result, nil
}
// SetArchiveRoot of remote repo
func (repo *RemoteRepo) SetArchiveRoot(archiveRoot string) {
repo.ArchiveRoot = archiveRoot
repo.prepare()
}
func (repo *RemoteRepo) prepare() error {
var err error
// Add final / to URL
if !strings.HasSuffix(repo.ArchiveRoot, "/") {
repo.ArchiveRoot = repo.ArchiveRoot + "/"
}
repo.archiveRootURL, err = url.Parse(repo.ArchiveRoot)
return err
}
// String interface
func (repo *RemoteRepo) String() string {
srcFlag := ""
if repo.DownloadSources {
srcFlag += " [src]"
}
if repo.DownloadUdebs {
srcFlag += " [udeb]"
}
distribution := repo.Distribution
if distribution == "" {
distribution = "./"
}
return fmt.Sprintf("[%s]: %s %s%s", repo.Name, repo.ArchiveRoot, distribution, srcFlag)
}
// IsFlat determines if repository is flat
func (repo *RemoteRepo) IsFlat() bool {
// aptly < 0.5.1 had Distribution = "" for flat repos
// aptly >= 0.5.1 had Distribution = "./[path]/" for flat repos
return repo.Distribution == "" || (strings.HasPrefix(repo.Distribution, ".") && strings.HasSuffix(repo.Distribution, "/"))
}
// NumPackages return number of packages retrieved from remote repo
func (repo *RemoteRepo) NumPackages() int {
if repo.packageRefs == nil {
return 0
}
return repo.packageRefs.Len()
}
// RefList returns package list for repo
func (repo *RemoteRepo) RefList() *PackageRefList {
return repo.packageRefs
}
// MarkAsUpdating puts current PID and sets status to updating
func (repo *RemoteRepo) MarkAsUpdating() {
repo.Status = MirrorUpdating
repo.WorkerPID = os.Getpid()
}
// MarkAsIdle clears updating flag
func (repo *RemoteRepo) MarkAsIdle() {
repo.Status = MirrorIdle
repo.WorkerPID = 0
}
// CheckLock returns error if mirror is being updated by another process
func (repo *RemoteRepo) CheckLock() error {
if repo.Status == MirrorIdle || repo.WorkerPID == 0 {
return nil
}
p, err := os.FindProcess(repo.WorkerPID)
if err != nil {
return nil
}
err = p.Signal(syscall.Signal(0))
if err == nil {
return fmt.Errorf("mirror is locked by update operation, PID %d", repo.WorkerPID)
}
return nil
}
// IndexesRootURL builds URL for various indexes
func (repo *RemoteRepo) IndexesRootURL() *url.URL {
var path *url.URL
if !repo.IsFlat() {
path = &url.URL{Path: fmt.Sprintf("dists/%s/", repo.Distribution)}
} else {
path = &url.URL{Path: repo.Distribution}
}
return repo.archiveRootURL.ResolveReference(path)
}
// ReleaseURL returns URL to Release* files in repo root
func (repo *RemoteRepo) ReleaseURL(name string) *url.URL {
return repo.IndexesRootURL().ResolveReference(&url.URL{Path: name})
}
// FlatBinaryPath returns path to Packages files for flat repo
func (repo *RemoteRepo) FlatBinaryPath() string {
return "Packages"
}
// FlatSourcesPath returns path to Sources files for flat repo
func (repo *RemoteRepo) FlatSourcesPath() string {
return "Sources"
}
// BinaryPath returns path to Packages files for given component and
// architecture
func (repo *RemoteRepo) BinaryPath(component string, architecture string) string {
return fmt.Sprintf("%s/binary-%s/Packages", component, architecture)
}
// SourcesPath returns path to Sources files for given component
func (repo *RemoteRepo) SourcesPath(component string) string {
return fmt.Sprintf("%s/source/Sources", component)
}
// UdebPath returns path of Packages files for given component and
// architecture
func (repo *RemoteRepo) UdebPath(component string, architecture string) string {
return fmt.Sprintf("%s/debian-installer/binary-%s/Packages", component, architecture)
}
// PackageURL returns URL of package file relative to repository root
// architecture
func (repo *RemoteRepo) PackageURL(filename string) *url.URL {
path := &url.URL{Path: filename}
return repo.archiveRootURL.ResolveReference(path)
}
// Fetch updates information about repository
func (repo *RemoteRepo) Fetch(d aptly.Downloader, verifier pgp.Verifier) error {
var (
release, inrelease, releasesig *os.File
err error
)
if verifier == nil {
// 0. Just download release file to temporary URL
release, err = http.DownloadTemp(gocontext.TODO(), d, repo.ReleaseURL("Release").String())
if err != nil {
return err
}
} else {
// 1. try InRelease file
inrelease, err = http.DownloadTemp(gocontext.TODO(), d, repo.ReleaseURL("InRelease").String())
if err != nil {
goto splitsignature
}
defer inrelease.Close()
_, err = verifier.VerifyClearsigned(inrelease, true)
if err != nil {
goto splitsignature
}
inrelease.Seek(0, 0)
release, err = verifier.ExtractClearsigned(inrelease)
if err != nil {
goto splitsignature
}
goto ok
splitsignature:
// 2. try Release + Release.gpg
release, err = http.DownloadTemp(gocontext.TODO(), d, repo.ReleaseURL("Release").String())
if err != nil {
return err
}
releasesig, err = http.DownloadTemp(gocontext.TODO(), d, repo.ReleaseURL("Release.gpg").String())
if err != nil {
return err
}
err = verifier.VerifyDetachedSignature(releasesig, release, true)
if err != nil {
return err
}
_, err = release.Seek(0, 0)
if err != nil {
return err
}
}
ok:
defer release.Close()
sreader := NewControlFileReader(release)
stanza, err := sreader.ReadStanza(true)
if err != nil {
return err
}
if !repo.IsFlat() {
architectures := strings.Split(stanza["Architectures"], " ")
sort.Strings(architectures)
// "source" architecture is never present, despite Release file claims
architectures = utils.StrSlicesSubstract(architectures, []string{ArchitectureSource})
if len(repo.Architectures) == 0 {
repo.Architectures = architectures
} else if !repo.SkipArchitectureCheck {
err = utils.StringsIsSubset(repo.Architectures, architectures,
fmt.Sprintf("architecture %%s not available in repo %s, use -force-architectures to override", repo))
if err != nil {
return err
}
}
components := strings.Split(stanza["Components"], " ")
if strings.Contains(repo.Distribution, "/") {
distributionLast := path.Base(repo.Distribution) + "/"
for i := range components {
components[i] = strings.TrimPrefix(components[i], distributionLast)
}
}
if len(repo.Components) == 0 {
repo.Components = components
} else if !repo.SkipComponentCheck {
err = utils.StringsIsSubset(repo.Components, components,
fmt.Sprintf("component %%s not available in repo %s, use -force-components to override", repo))
if err != nil {
return err
}
}
}
repo.ReleaseFiles = make(map[string]utils.ChecksumInfo)
parseSums := func(field string, setter func(sum *utils.ChecksumInfo, data string)) error {
for _, line := range strings.Split(stanza[field], "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.Fields(line)
if len(parts) != 3 {
return fmt.Errorf("unparseable hash sum line: %#v", line)
}
var size int64
size, err = strconv.ParseInt(parts[1], 10, 64)
if err != nil {
return fmt.Errorf("unable to parse size: %s", err)
}
sum := repo.ReleaseFiles[parts[2]]
sum.Size = size
setter(&sum, parts[0])
repo.ReleaseFiles[parts[2]] = sum
}
delete(stanza, field)
return nil
}
err = parseSums("MD5Sum", func(sum *utils.ChecksumInfo, data string) { sum.MD5 = data })
if err != nil {
return err
}
err = parseSums("SHA1", func(sum *utils.ChecksumInfo, data string) { sum.SHA1 = data })
if err != nil {
return err
}
err = parseSums("SHA256", func(sum *utils.ChecksumInfo, data string) { sum.SHA256 = data })
if err != nil {
return err
}
err = parseSums("SHA512", func(sum *utils.ChecksumInfo, data string) { sum.SHA512 = data })
if err != nil {
return err
}
repo.Meta = stanza
return nil
}
// DownloadPackageIndexes downloads & parses package index files
func (repo *RemoteRepo) DownloadPackageIndexes(progress aptly.Progress, d aptly.Downloader, collectionFactory *CollectionFactory,
ignoreMismatch bool, maxTries int) error {
if repo.packageList != nil {
panic("packageList != nil")
}
repo.packageList = NewPackageList()
// Download and parse all Packages & Source files
packagesPaths := [][]string{}
if repo.IsFlat() {
packagesPaths = append(packagesPaths, []string{repo.FlatBinaryPath(), PackageTypeBinary})
if repo.DownloadSources {
packagesPaths = append(packagesPaths, []string{repo.FlatSourcesPath(), PackageTypeSource})
}
} else {
for _, component := range repo.Components {
for _, architecture := range repo.Architectures {
packagesPaths = append(packagesPaths, []string{repo.BinaryPath(component, architecture), PackageTypeBinary})
if repo.DownloadUdebs {
packagesPaths = append(packagesPaths, []string{repo.UdebPath(component, architecture), PackageTypeUdeb})
}
}
if repo.DownloadSources {
packagesPaths = append(packagesPaths, []string{repo.SourcesPath(component), PackageTypeSource})
}
}
}
for _, info := range packagesPaths {
path, kind := info[0], info[1]
packagesReader, packagesFile, err := http.DownloadTryCompression(gocontext.TODO(), d, repo.IndexesRootURL(), path, repo.ReleaseFiles, ignoreMismatch, maxTries)
if err != nil {
return err
}
defer packagesFile.Close()
stat, _ := packagesFile.Stat()
progress.InitBar(stat.Size(), true)
sreader := NewControlFileReader(packagesReader)
for {
stanza, err := sreader.ReadStanza(false)
if err != nil {
return err
}
if stanza == nil {
break
}
off, _ := packagesFile.Seek(0, 1)
progress.SetBar(int(off))
var p *Package
if kind == PackageTypeBinary {
p = NewPackageFromControlFile(stanza)
} else if kind == PackageTypeUdeb {
p = NewUdebPackageFromControlFile(stanza)
} else if kind == PackageTypeSource {
p, err = NewSourcePackageFromControlFile(stanza)
if err != nil {
return err
}
}
err = repo.packageList.Add(p)
if err != nil {
if _, ok := err.(*PackageConflictError); ok {
progress.ColoredPrintf("@y[!]@| @!skipping package %s: duplicate in packages index@|", p)
} else {
return err
}
}
}
progress.ShutdownBar()
}
return nil
}
// ApplyFilter applies filtering to already built PackageList
func (repo *RemoteRepo) ApplyFilter(dependencyOptions int, filterQuery PackageQuery, progress aptly.Progress) (oldLen, newLen int, err error) {
repo.packageList.PrepareIndex()
emptyList := NewPackageList()
emptyList.PrepareIndex()
oldLen = repo.packageList.Len()
repo.packageList, err = repo.packageList.FilterWithProgress([]PackageQuery{filterQuery}, repo.FilterWithDeps, emptyList, dependencyOptions, repo.Architectures, progress)
if repo.packageList != nil {
newLen = repo.packageList.Len()
}
return
}
// BuildDownloadQueue builds queue, discards current PackageList
func (repo *RemoteRepo) BuildDownloadQueue(packagePool aptly.PackagePool, packageCollection *PackageCollection, checksumStorage aptly.ChecksumStorage, skipExistingPackages bool) (queue []PackageDownloadTask, downloadSize int64, err error) {
queue = make([]PackageDownloadTask, 0, repo.packageList.Len())
seen := make(map[string]int, repo.packageList.Len())
err = repo.packageList.ForEach(func(p *Package) error {
if repo.packageRefs != nil && skipExistingPackages {
if repo.packageRefs.Has(p) {
// skip this package, but load checksums/files from package in DB
var prevP *Package
prevP, err = packageCollection.ByKey(p.Key(""))
if err != nil {
return err
}
p.UpdateFiles(prevP.Files())
return nil
}
}
list, err2 := p.DownloadList(packagePool, checksumStorage)
if err2 != nil {
return err2
}
for _, task := range list {
key := task.File.DownloadURL()
idx, found := seen[key]
if !found {
queue = append(queue, task)
downloadSize += task.File.Checksums.Size
seen[key] = len(queue) - 1
} else {
// hook up the task to duplicate entry already on the list
queue[idx].Additional = append(queue[idx].Additional, task)
}
}
return nil
})
if err != nil {
return
}
return
}
// FinalizeDownload swaps for final value of package refs
func (repo *RemoteRepo) FinalizeDownload(collectionFactory *CollectionFactory, progress aptly.Progress) error {
repo.LastDownloadDate = time.Now()
if progress != nil {
progress.InitBar(int64(repo.packageList.Len()), true)
}
var i int
// update all the packages in collection
err := repo.packageList.ForEach(func(p *Package) error {
i++
if progress != nil {
progress.SetBar(i)
}
// download process might have updated checksums
p.UpdateFiles(p.Files())
return collectionFactory.PackageCollection().Update(p)
})
repo.packageRefs = NewPackageRefListFromPackageList(repo.packageList)
if progress != nil {
progress.ShutdownBar()
}
repo.packageList = nil
return err
}
// Encode does msgpack encoding of RemoteRepo
func (repo *RemoteRepo) Encode() []byte {
var buf bytes.Buffer
encoder := codec.NewEncoder(&buf, &codec.MsgpackHandle{})
encoder.Encode(repo)
return buf.Bytes()
}
// Decode decodes msgpack representation into RemoteRepo
func (repo *RemoteRepo) Decode(input []byte) error {
decoder := codec.NewDecoderBytes(input, &codec.MsgpackHandle{})
err := decoder.Decode(repo)
if err != nil {
if strings.HasPrefix(err.Error(), "codec.decoder: readContainerLen: Unrecognized descriptor byte: hex: 80") {
// probably it is broken DB from go < 1.2, try decoding w/o time.Time
var repo11 struct { // nolint: maligned
UUID string
Name string
ArchiveRoot string
Distribution string
Components []string
Architectures []string
DownloadSources bool
Meta Stanza
LastDownloadDate []byte
ReleaseFiles map[string]utils.ChecksumInfo
Filter string
FilterWithDeps bool
}
decoder = codec.NewDecoderBytes(input, &codec.MsgpackHandle{})
err2 := decoder.Decode(&repo11)
if err2 != nil {
return err
}
repo.UUID = repo11.UUID
repo.Name = repo11.Name
repo.ArchiveRoot = repo11.ArchiveRoot
repo.Distribution = repo11.Distribution
repo.Components = repo11.Components
repo.Architectures = repo11.Architectures
repo.DownloadSources = repo11.DownloadSources
repo.Meta = repo11.Meta
repo.ReleaseFiles = repo11.ReleaseFiles
repo.Filter = repo11.Filter
repo.FilterWithDeps = repo11.FilterWithDeps
} else {
return err
}
}
return repo.prepare()
}
// Key is a unique id in DB
func (repo *RemoteRepo) Key() []byte {
return []byte("R" + repo.UUID)
}
// RefKey is a unique id for package reference list
func (repo *RemoteRepo) RefKey() []byte {
return []byte("E" + repo.UUID)
}
// RemoteRepoCollection does listing, updating/adding/deleting of RemoteRepos
type RemoteRepoCollection struct {
*sync.RWMutex
db database.Storage
list []*RemoteRepo
}
// NewRemoteRepoCollection loads RemoteRepos from DB and makes up collection
func NewRemoteRepoCollection(db database.Storage) *RemoteRepoCollection {
return &RemoteRepoCollection{
RWMutex: &sync.RWMutex{},
db: db,
}
}
func (collection *RemoteRepoCollection) loadList() {
if collection.list != nil {
return
}
blobs := collection.db.FetchByPrefix([]byte("R"))
collection.list = make([]*RemoteRepo, 0, len(blobs))
for _, blob := range blobs {
r := &RemoteRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding mirror: %s\n", err)
} else {
collection.list = append(collection.list, r)
}
}
}
// Add appends new repo to collection and saves it
func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error {
collection.loadList()
for _, r := range collection.list {
if r.Name == repo.Name {
return fmt.Errorf("mirror with name %s already exists", repo.Name)
}
}
err := collection.Update(repo)
if err != nil {
return err
}
collection.list = append(collection.list, repo)
return nil
}
// Update stores updated information about repo in DB
func (collection *RemoteRepoCollection) Update(repo *RemoteRepo) error {
err := collection.db.Put(repo.Key(), repo.Encode())
if err != nil {
return err
}
if repo.packageRefs != nil {
err = collection.db.Put(repo.RefKey(), repo.packageRefs.Encode())
if err != nil {
return err
}
}
return nil
}
// LoadComplete loads additional information for remote repo
func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error {
encoded, err := collection.db.Get(repo.RefKey())
if err == database.ErrNotFound {
return nil
}
if err != nil {
return err
}
repo.packageRefs = &PackageRefList{}
return repo.packageRefs.Decode(encoded)
}
// ByName looks up repository by name
func (collection *RemoteRepoCollection) ByName(name string) (*RemoteRepo, error) {
collection.loadList()
for _, r := range collection.list {
if r.Name == name {
return r, nil
}
}
return nil, fmt.Errorf("mirror with name %s not found", name)
}
// ByUUID looks up repository by uuid
func (collection *RemoteRepoCollection) ByUUID(uuid string) (*RemoteRepo, error) {
collection.loadList()
for _, r := range collection.list {
if r.UUID == uuid {
return r, nil
}
}
return nil, fmt.Errorf("mirror with uuid %s not found", uuid)
}
// ForEach runs method for each repository
func (collection *RemoteRepoCollection) ForEach(handler func(*RemoteRepo) error) error {
return collection.db.ProcessByPrefix([]byte("R"), func(key, blob []byte) error {
r := &RemoteRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding mirror: %s\n", err)
return nil
}
return handler(r)
})
}
// Len returns number of remote repos
func (collection *RemoteRepoCollection) Len() int {
collection.loadList()
return len(collection.list)
}
// Drop removes remote repo from collection
func (collection *RemoteRepoCollection) Drop(repo *RemoteRepo) error {
collection.loadList()
repoPosition := -1
for i, r := range collection.list {
if r == repo {
repoPosition = i
break
}
}
if repoPosition == -1 {
panic("repo not found!")
}
collection.list[len(collection.list)-1], collection.list[repoPosition], collection.list =
nil, collection.list[len(collection.list)-1], collection.list[:len(collection.list)-1]
err := collection.db.Delete(repo.Key())
if err != nil {
return err
}
return collection.db.Delete(repo.RefKey())
}