From 2db06f05767dd8475df2f071785d9775144ee549 Mon Sep 17 00:00:00 2001 From: ahrav Date: Wed, 15 May 2024 18:25:36 -0700 Subject: [PATCH] [bug] - Handle empty reader case in newFileReader (#2854) * Correclty handle empty files * fix * fix test --- pkg/handlers/archive.go | 13 ++++++++++-- pkg/handlers/handlers.go | 11 ++++++++++ pkg/handlers/handlers_test.go | 19 ++++++++++++++++++ pkg/handlers/testdata/testdir.zip | Bin 0 -> 946 bytes pkg/readers/bufferedfilereader.go | 3 +++ .../bufferedfilewriter.go | 8 ++------ .../bufferedfilewriter_test.go | 9 ++++++--- 7 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 pkg/handlers/testdata/testdir.zip diff --git a/pkg/handlers/archive.go b/pkg/handlers/archive.go index e0e6313b1f06..61b163322cf7 100644 --- a/pkg/handlers/archive.go +++ b/pkg/handlers/archive.go @@ -21,7 +21,7 @@ const ( var ( maxDepth = 5 - maxSize = 250 * 1024 * 1024 // 250 MB + maxSize = 2 << 30 // 2 GB maxTimeout = time.Duration(30) * time.Second ) @@ -101,6 +101,10 @@ func (h *archiveHandler) openArchive(ctx logContext.Context, depth int, reader f rdr, err := newFileReader(compReader) if err != nil { + if errors.Is(err, ErrEmptyReader) { + ctx.Logger().V(5).Info("empty reader, skipping file") + return nil + } return fmt.Errorf("error creating custom reader: %w", err) } defer rdr.Close() @@ -146,7 +150,8 @@ func (h *archiveHandler) extractorHandler(archiveChan chan []byte) func(context. fileSize := file.Size() if int(fileSize) > maxSize { - lCtx.Logger().V(3).Info("skipping file due to size") + lCtx.Logger().V(3).Info("skipping file due to size", "size", fileSize) + h.metrics.incFilesSkipped() return nil } @@ -180,6 +185,10 @@ func (h *archiveHandler) extractorHandler(archiveChan chan []byte) func(context. rdr, err := newFileReader(f) if err != nil { + if errors.Is(err, ErrEmptyReader) { + lCtx.Logger().V(5).Info("empty reader, skipping file") + return nil + } return fmt.Errorf("error creating custom reader: %w", err) } defer rdr.Close() diff --git a/pkg/handlers/handlers.go b/pkg/handlers/handlers.go index e8efd4b12634..aa810a2f2fd4 100644 --- a/pkg/handlers/handlers.go +++ b/pkg/handlers/handlers.go @@ -35,6 +35,8 @@ type fileReader struct { isGenericArchive bool } +var ErrEmptyReader = errors.New("reader is empty") + func newFileReader(r io.ReadCloser) (fileReader, error) { defer r.Close() @@ -57,6 +59,11 @@ func newFileReader(r io.ReadCloser) (fileReader, error) { } }() + // Check if the reader is empty. + if rdr.Size() == 0 { + return reader, ErrEmptyReader + } + format, arReader, err := archiver.Identify("", rdr) switch { case err == nil: // Archive detected @@ -172,6 +179,10 @@ func HandleFile( rdr, err := newFileReader(reader) if err != nil { + if errors.Is(err, ErrEmptyReader) { + ctx.Logger().V(5).Info("empty reader, skipping file") + return nil + } return fmt.Errorf("error creating custom reader: %w", err) } defer rdr.Close() diff --git a/pkg/handlers/handlers_test.go b/pkg/handlers/handlers_test.go index 3ec232d7bcc7..cfa23ab7c6b0 100644 --- a/pkg/handlers/handlers_test.go +++ b/pkg/handlers/handlers_test.go @@ -215,3 +215,22 @@ func TestHandleFileNonArchive(t *testing.T) { assert.NoError(t, err) assert.Equal(t, wantChunkCount, len(reporter.Ch)) } + +func TestExtractTarContentWithEmptyFile(t *testing.T) { + file, err := os.Open("testdata/testdir.zip") + assert.Nil(t, err) + + chunkCh := make(chan *sources.Chunk) + go func() { + defer close(chunkCh) + err := HandleFile(logContext.Background(), file, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh}) + assert.NoError(t, err) + }() + + wantCount := 4 + count := 0 + for range chunkCh { + count++ + } + assert.Equal(t, wantCount, count) +} diff --git a/pkg/handlers/testdata/testdir.zip b/pkg/handlers/testdata/testdir.zip new file mode 100644 index 0000000000000000000000000000000000000000..29c11b0b0a34ef122b86d7e2b9a60f294be21819 GIT binary patch literal 946 zcmWIWW@Zs#0DpkSY4+`t z9s9%MZPsn9nk|#J|Jj92SKF0!%f95*?6xVszUuhasYeaN`}6Ne<~}a2zWn>==1;G_ z{SA+>T>T~NyqlL6k0Od`yNfQQ^G%iIlp4fyM;LXYgGJzQg Mg@HlT$pqp70QpE#$p8QV literal 0 HcmV?d00001 diff --git a/pkg/readers/bufferedfilereader.go b/pkg/readers/bufferedfilereader.go index 47bc2f2196e0..44c2bb303e4b 100644 --- a/pkg/readers/bufferedfilereader.go +++ b/pkg/readers/bufferedfilereader.go @@ -79,3 +79,6 @@ func (b *BufferedFileReader) ReadAt(p []byte, off int64) (n int, err error) { } return b.reader.Read(p) } + +// Size returns the total size of the data stored in the BufferedFileReader. +func (b *BufferedFileReader) Size() int { return b.bufWriter.Len() } diff --git a/pkg/writers/buffered_file_writer/bufferedfilewriter.go b/pkg/writers/buffered_file_writer/bufferedfilewriter.go index 16713edda954..3e9a5aa37db4 100644 --- a/pkg/writers/buffered_file_writer/bufferedfilewriter.go +++ b/pkg/writers/buffered_file_writer/bufferedfilewriter.go @@ -84,14 +84,10 @@ func New(opts ...Option) *BufferedFileWriter { // NewFromReader creates a new instance of BufferedFileWriter and writes the content from the provided reader to the writer. func NewFromReader(r io.Reader, opts ...Option) (*BufferedFileWriter, error) { writer := New(opts...) - if _, err := io.Copy(writer, r); err != nil { + if _, err := io.Copy(writer, r); err != nil && !errors.Is(err, io.EOF) { return nil, fmt.Errorf("error writing to buffered file writer: %w", err) } - if writer.buf == nil { - return nil, fmt.Errorf("reader was empty; no data written to buffered file writer") - } - return writer, nil } @@ -271,7 +267,7 @@ func (w *BufferedFileWriter) ReadSeekCloser() (io.ReadSeekCloser, error) { } if w.buf == nil { - return nil, fmt.Errorf("BufferedFileWriter has not buffer data to read") + return nil, nil } // Data is in memory. diff --git a/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go b/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go index e40b587c251c..329bbe4182b5 100644 --- a/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go +++ b/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go @@ -518,9 +518,8 @@ func TestNewFromReader(t *testing.T) { wantData: "hello world", }, { - name: "Empty reader", - reader: strings.NewReader(""), - wantErr: true, + name: "Empty reader", + reader: strings.NewReader(""), }, { name: "Error reader", @@ -550,6 +549,10 @@ func TestNewFromReader(t *testing.T) { return } assert.NoError(t, err) + + if rdr == nil { + return + } defer rdr.Close() _, err = b.ReadFrom(rdr)