Skip to content

Commit

Permalink
Add Download method to ArchiveFetcher for downloading without extracting
Browse files Browse the repository at this point in the history
Signed-off-by: Matheus Pimenta <matheuscscp@gmail.com>
  • Loading branch information
matheuscscp committed Feb 26, 2024
1 parent 739461c commit 068684f
Show file tree
Hide file tree
Showing 2 changed files with 290 additions and 44 deletions.
185 changes: 142 additions & 43 deletions http/fetch/archive_fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package fetch

import (
"context"
_ "crypto/sha256"
_ "crypto/sha512"
"errors"
Expand All @@ -25,6 +26,7 @@ import (
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"

Expand All @@ -36,65 +38,162 @@ import (
"github.com/fluxcd/pkg/tar"
)

// ArchiveFetcher holds the HTTP client that reties with back off when
// the file server is offline.
// ArchiveFetcher is a flexible API for downloading an archive from an HTTP server,
// verifying its digest and extracting its contents to a given path in the Filesystem.
type ArchiveFetcher struct {
httpClient *retryablehttp.Client
maxDownloadSize int
maxUntarSize int
hostnameOverwrite string
retries int
maxDownloadSize int
maxUntarSize int
untar bool
hostnameOverride string
filename string
logger any

httpClient *retryablehttp.Client
}

// Option is an option for constructing the ArchiveFetcher.
type Option func(a *ArchiveFetcher)

// ErrFileNotFound is an error type used to signal 404 HTTP status code responses.
var ErrFileNotFound = errors.New("file not found")

// NewArchiveFetcher configures the retryable HTTP client used for fetching archives.
func NewArchiveFetcher(retries, maxDownloadSize, maxUntarSize int, hostnameOverwrite string) *ArchiveFetcher {
return NewArchiveFetcherWithLogger(retries, maxDownloadSize, maxUntarSize, hostnameOverwrite, nil)
// WithRetries sets the maximum amount of retries the HTTP client will be allowed to make.
func WithRetries(retries int) Option {
return func(a *ArchiveFetcher) {
a.retries = retries
}
}

// NewArchiveFetcherWithLogger configures the retryable HTTP client used for
// fetching archives and sets the logger to use.
//
// WithMaxDownloadSize specifies a limit for the size of the downloaded tarball.
func WithMaxDownloadSize(maxDownloadSize int) Option {
return func(a *ArchiveFetcher) {
a.maxDownloadSize = maxDownloadSize
}
}

// WithMaxUntarSize specifies a limit for the size of the extracted content of the tarball.
func WithMaxUntarSize(maxUntarSize int) Option {
return func(a *ArchiveFetcher) {
a.maxUntarSize = maxUntarSize
}
}

// WithUntar tells the ArchiveFetcher to untar the tarball.
func WithUntar(untar bool) Option {
return func(a *ArchiveFetcher) {
a.untar = untar
}
}

// WithHostnameOverride sets an override for the hostname in download URLs.
func WithHostnameOverride(hostnameOverride string) Option {
return func(a *ArchiveFetcher) {
a.hostnameOverride = hostnameOverride
}
}

// WithLogger sets a logger for the HTTP client.
// The logger can be any type that implements the retryablehttp.Logger or
// retryablehttp.LeveledLogger interface. If the logger is of type logr.Logger,
// it will be wrapped in a retryablehttp.LeveledLogger that only logs errors.
func NewArchiveFetcherWithLogger(retries, maxDownloadSize, maxUntarSize int, hostnameOverwrite string, logger any) *ArchiveFetcher {
httpClient := retryablehttp.NewClient()
httpClient.RetryWaitMin = 5 * time.Second
httpClient.RetryWaitMax = 30 * time.Second
httpClient.RetryMax = retries
func WithLogger(logger any) Option {
return func(a *ArchiveFetcher) {
a.logger = logger
}
}

// WithFileName sets the file name for storing the downloaded archive.
func WithFileName(filename string) Option {
return func(a *ArchiveFetcher) {
a.filename = filename
}
}

switch logger.(type) {
// New creates an *ArchiveFetcher accepting options.
func New(opts ...Option) *ArchiveFetcher {
a := &ArchiveFetcher{
maxUntarSize: tar.UnlimitedUntarSize, // unlimited by default
}
for _, opt := range opts {
opt(a)
}

// Create HTTP client.
a.httpClient = retryablehttp.NewClient()
a.httpClient.RetryWaitMin = 5 * time.Second
a.httpClient.RetryWaitMax = 30 * time.Second
a.httpClient.RetryMax = a.retries
switch a.logger.(type) {
case logr.Logger:
httpClient.Logger = newErrorLogger(logger.(logr.Logger))
a.httpClient.Logger = newErrorLogger(a.logger.(logr.Logger))
default:
httpClient.Logger = logger
a.httpClient.Logger = a.logger
}

return &ArchiveFetcher{
httpClient: httpClient,
maxDownloadSize: maxDownloadSize,
maxUntarSize: maxUntarSize,
hostnameOverwrite: hostnameOverwrite,
}
return a
}

// NewArchiveFetcher configures the retryable HTTP client used for fetching archives.
//
// Deprecated: Use New() instead.
func NewArchiveFetcher(retries, maxDownloadSize, maxUntarSize int, hostnameOverride string) *ArchiveFetcher {
return NewArchiveFetcherWithLogger(retries, maxDownloadSize, maxUntarSize, hostnameOverride, nil)
}

// NewArchiveFetcherWithLogger configures the retryable HTTP client used for
// fetching archives and sets the logger to use.
//
// The logger can be any type that implements the retryablehttp.Logger or
// retryablehttp.LeveledLogger interface. If the logger is of type logr.Logger,
// it will be wrapped in a retryablehttp.LeveledLogger that only logs errors.
//
// Deprecated: Use New() instead.
func NewArchiveFetcherWithLogger(retries, maxDownloadSize, maxUntarSize int, hostnameOverride string, logger any) *ArchiveFetcher {
return New(WithRetries(retries), WithMaxDownloadSize(maxDownloadSize), WithUntar(true),
WithMaxUntarSize(maxUntarSize), WithHostnameOverride(hostnameOverride), WithLogger(logger))
}

// Fetch downloads, verifies and extracts the tarball content to the specified directory.
// If the file server responds with 5xx errors, the download operation is retried.
// If the file server responds with 404, the returned error is of type ErrFileNotFound.
// If the file server is unavailable for more than 3 minutes, the returned error contains the original status code.
func (r *ArchiveFetcher) Fetch(archiveURL, digest, dir string) error {
if r.hostnameOverwrite != "" {
return r.FetchWithContext(context.Background(), archiveURL, digest, dir)
}

// FetchWithContext is the same as Fetch but accepts a context.
func (r *ArchiveFetcher) FetchWithContext(ctx context.Context, archiveURL, digest, dir string) error {
var f *os.File
var err error
if r.untar {
f, err = os.CreateTemp("", "fetch.*.tmp")
if err != nil {
return fmt.Errorf("failed to create temp file: %w", err)
}
defer os.Remove(f.Name())
} else {
fn := filepath.Base(archiveURL)
if r.filename != "" {
fn = r.filename
}
path := filepath.Join(dir, fn)
f, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
return fmt.Errorf("failed to create target file: %w", err)
}
}

if r.hostnameOverride != "" {
u, err := url.Parse(archiveURL)
if err != nil {
return err
}
u.Host = r.hostnameOverwrite
u.Host = r.hostnameOverride
archiveURL = u.String()
}

req, err := retryablehttp.NewRequest(http.MethodGet, archiveURL, nil)
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, archiveURL, nil)
if err != nil {
return fmt.Errorf("failed to create a new request: %w", err)
}
Expand All @@ -112,12 +211,6 @@ func (r *ArchiveFetcher) Fetch(archiveURL, digest, dir string) error {
return fmt.Errorf("failed to download archive from %s (status: %s)", archiveURL, resp.Status)
}

f, err := os.CreateTemp("", "fetch.*.tmp")
if err != nil {
return fmt.Errorf("failed to create temp file: %w", err)
}
defer os.Remove(f.Name())

// Save temporary file, but limit download to the max download size.
if r.maxDownloadSize > 0 {
// Headers can lie, so instead of trusting resp.ContentLength,
Expand Down Expand Up @@ -150,15 +243,21 @@ func (r *ArchiveFetcher) Fetch(archiveURL, digest, dir string) error {
return fmt.Errorf("failed to verify archive: %w", err)
}

// Jump back at the beginning of the file stream again.
_, err = f.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek back to beginning again: %w", err)
}
if r.untar {
// Jump back at the beginning of the file stream again.
_, err = f.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek back to beginning again: %w", err)
}

// Extracts the tar file.
if err = tar.Untar(f, dir, tar.WithMaxUntarSize(r.maxUntarSize), tar.WithSkipSymlinks()); err != nil {
return fmt.Errorf("failed to extract archive (check whether file size exceeds max download size): %w", err)
// Extracts the tar file.
if err = tar.Untar(f, dir, tar.WithMaxUntarSize(r.maxUntarSize), tar.WithSkipSymlinks()); err != nil {
return fmt.Errorf("failed to extract archive (check whether file size exceeds max download size): %w", err)
}
} else {
if err := f.Close(); err != nil {
return fmt.Errorf("failed to close target file: %w", err)
}
}

return nil
Expand Down
Loading

0 comments on commit 068684f

Please sign in to comment.