Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix malformed CAR panics and excessive memory usage #312

Merged
merged 37 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4257e9c
fix: don't OOM if the header size is too big
Jorropo Jun 16, 2022
6c256c2
fix: do bound check while checking for CIDv0
Jorropo Jun 16, 2022
3143968
test: add fuzzing of NewCarReader
Jorropo Jun 16, 2022
c133105
fix: v2 don't OOM if the header size is too big
Jorropo Jun 16, 2022
e36135f
test: v2 add fuzzing to BlockReader
Jorropo Jun 16, 2022
8004dff
fix: v2 don't accept overflowing offsets while reading v2 headers
Jorropo Jun 16, 2022
f5b91b9
test: v2 add fuzzing to Reader
Jorropo Jun 16, 2022
6dc2ea1
fix: v2 don't allocate indexes too big
Jorropo Jun 16, 2022
67ff54f
fix: v2 don't divide by zero in width indexes
Jorropo Jun 17, 2022
4e24d90
test: v2 add fuzzing of the index
Jorropo Jun 17, 2022
2eea288
ci: add fuzzing on CI
Jorropo Jun 17, 2022
f8735e6
feat: Refactor indexes to put storage considerations on consumers
Jorropo Jun 17, 2022
6e4e208
fix index comparisons
rvagg Jun 21, 2022
3eabc2d
fix: revert to internalio.NewOffsetReadSeeker in Reader#IndexReader
rvagg Jun 21, 2022
04a85e7
fix: staticcheck catches
rvagg Jun 21, 2022
77de9fe
fix: don't use multiple-go-modules for fuzzing
rvagg Jun 21, 2022
dfbe3f7
fix: use CidFromReader() which has overread and OOM protection
rvagg Jun 21, 2022
3940bf5
feat: MaxAllowedSectionSize default to 32M
rvagg Jun 21, 2022
ec34902
feat: MaxAllowed{Header,Section}Size option
rvagg Jun 21, 2022
3c9f1d2
fix: explicitly disable serialization of insertionindex
rvagg Jun 22, 2022
3a1b4e8
fix: tighter constraint of singleWidthIndex width, add index recommen…
rvagg Jun 23, 2022
8a5b330
Fix testutil assertion logic and update index generation tests
masih Jun 30, 2022
fd7281b
Use a fix code as the multihash code for `CarIndexSorted`
masih Jun 30, 2022
e6d416c
Remove support for `ForEach` enumeration from car-index-sorted
masih Jun 30, 2022
2539ce2
feat: add Reader#Inspect() function to check basic validity of a CAR …
rvagg Jun 30, 2022
708b0a2
feat: add block hash validation to Inspect()
rvagg Jul 1, 2022
a36603e
test: add fuzzing for reader#Inspect
Jorropo Jul 1, 2022
965f1f3
Use streaming APIs to verify the hash of blocks in CAR `Inspect`
masih Jul 1, 2022
a274e75
Use consistent CID mismatch error in `Inspect` and `BlockReader.Next`
masih Jul 2, 2022
641c0f8
Benchmark `Reader.Inspect` with and without hash validation
masih Jul 2, 2022
8696a19
Drop repeated package name from `CarStats`
masih Jul 3, 2022
bed1297
Return error when section length is invalid `varint`
masih Jul 4, 2022
a41506a
Fix fuzz CI job
masih Jul 4, 2022
a971f7c
Revert changes to `index.Index` while keeping most of security fixes
masih Jul 4, 2022
dec4ca1
Revert changes to `insertionindex`
masih Jul 6, 2022
80bb0d5
ci: remove the reverted FuzzIndex fuzzer
Jorropo Jul 6, 2022
d68cd32
Bump version in prep for releasing go-car `v0`
masih Jul 6, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/go-fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
on: [ push, pull_request ]
name: Go Fuzz

jobs:
v1:
strategy:
fail-fast: true
matrix:
target: [ "CarReader" ]
runs-on: ubuntu-latest
name: Fuzz V1 ${{ matrix.target }}
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions/setup-go@v2
with:
go-version: 1.18.x
- name: Go information
run: |
go version
go env
- name: Run Fuzzing for 1m
run: go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m .
v2:
strategy:
fail-fast: true
matrix:
target: [ "BlockReader", "Reader", "Inspect" ]
runs-on: ubuntu-latest
name: Fuzz V2 ${{ matrix.target }}
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions/setup-go@v2
with:
go-version: 1.18.x
- name: Go information
run: |
go version
go env
- name: Run Fuzzing for 1m
run: |
cd v2
go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m .
6 changes: 4 additions & 2 deletions car_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,11 @@ func TestRoundtrip(t *testing.T) {
}
}

// fixture is a clean single-block, single-root CAR
const fixtureStr = "3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5"

func TestEOFHandling(t *testing.T) {
// fixture is a clean single-block, single-root CAR
fixture, err := hex.DecodeString("3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5")
fixture, err := hex.DecodeString(fixtureStr)
if err != nil {
t.Fatal(err)
}
Expand Down
35 changes: 35 additions & 0 deletions fuzz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//go:build go1.18
// +build go1.18

package car_test

import (
"bytes"
"encoding/hex"
"io"
"testing"

car "github.com/ipld/go-car"
)

func FuzzCarReader(f *testing.F) {
fixture, err := hex.DecodeString(fixtureStr)
if err != nil {
f.Fatal(err)
}
f.Add(fixture)

f.Fuzz(func(t *testing.T, data []byte) {
r, err := car.NewCarReader(bytes.NewReader(data))
if err != nil {
return
}

for {
_, err = r.Next()
if err == io.EOF {
return
}
}
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
go test fuzz v1
[]byte("\xe0\xe0\xe0\xe0\xa7\x06\folLʔ<#oK\x19g#H\x96\b\xed\xb4*\x8b\x8f\xa8\vgversion\x19")
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
go test fuzz v1
[]byte(":\xa2eroots\x81\xd80X%\x00\x0100 00000000000000000000000000000000gversion\x01\x010")
25 changes: 20 additions & 5 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,36 @@ import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"

cid "github.com/ipfs/go-cid"
mh "github.com/multiformats/go-multihash"
)

// MaxAllowedSectionSize dictates the maximum number of bytes that a CARv1 header
// or section is allowed to occupy without causing a decode to error.
// This cannot be supplied as an option, only adjusted as a global. You should
// use v2#NewReader instead since it allows for options to be passed in.
var MaxAllowedSectionSize uint = 32 << 20 // 32MiB

var cidv0Pref = []byte{0x12, 0x20}

type BytesReader interface {
io.Reader
io.ByteReader
}

// TODO: this belongs in the go-cid package
// Deprecated: ReadCid shouldn't be used directly, use CidFromReader from go-cid
func ReadCid(buf []byte) (cid.Cid, int, error) {
if bytes.Equal(buf[:2], cidv0Pref) {
c, err := cid.Cast(buf[:34])
return c, 34, err
if len(buf) >= 2 && bytes.Equal(buf[:2], cidv0Pref) {
i := 34
if len(buf) < i {
i = len(buf)
}
c, err := cid.Cast(buf[:i])
return c, i, err
}

br := bytes.NewReader(buf)
Expand Down Expand Up @@ -58,7 +69,7 @@ func ReadNode(br *bufio.Reader) (cid.Cid, []byte, error) {
return cid.Cid{}, nil, err
}

c, n, err := ReadCid(data)
n, c, err := cid.CidFromReader(bytes.NewReader(data))
if err != nil {
return cid.Cid{}, nil, err
}
Expand Down Expand Up @@ -112,6 +123,10 @@ func LdRead(r *bufio.Reader) ([]byte, error) {
return nil, err
}

if l > uint64(MaxAllowedSectionSize) { // Don't OOM
return nil, errors.New("malformed car; header is bigger than util.MaxAllowedSectionSize")
}

buf := make([]byte, l)
if _, err := io.ReadFull(r, buf); err != nil {
return nil, err
Expand Down
57 changes: 56 additions & 1 deletion v2/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ func BenchmarkExtractV1UsingReader(b *testing.B) {
if err != nil {
b.Fatal(err)
}
_, err = io.Copy(dst, reader.DataReader())
dr, err := reader.DataReader()
if err != nil {
b.Fatal(err)
}
_, err = io.Copy(dst, dr)
if err != nil {
b.Fatal(err)
}
Expand All @@ -119,6 +123,57 @@ func BenchmarkExtractV1UsingReader(b *testing.B) {
})
}

// BenchmarkReader_InspectWithBlockValidation benchmarks Reader.Inspect with block hash validation
// for a randomly generated CARv2 file of size 10 MiB.
func BenchmarkReader_InspectWithBlockValidation(b *testing.B) {
path := filepath.Join(b.TempDir(), "bench-large-v2.car")
generateRandomCarV2File(b, path, 10<<20) // 10 MiB
defer os.Remove(path)

info, err := os.Stat(path)
if err != nil {
b.Fatal(err)
}
b.SetBytes(info.Size())
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
benchmarkInspect(b, path, true)
}
})
}

// BenchmarkReader_InspectWithoutBlockValidation benchmarks Reader.Inspect without block hash
// validation for a randomly generated CARv2 file of size 10 MiB.
func BenchmarkReader_InspectWithoutBlockValidation(b *testing.B) {
path := filepath.Join(b.TempDir(), "bench-large-v2.car")
generateRandomCarV2File(b, path, 10<<20) // 10 MiB
defer os.Remove(path)

info, err := os.Stat(path)
if err != nil {
b.Fatal(err)
}
b.SetBytes(info.Size())
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
benchmarkInspect(b, path, false)
}
})
}

func benchmarkInspect(b *testing.B, path string, validateBlockHash bool) {
reader, err := carv2.OpenReader(path)
if err != nil {
b.Fatal(err)
}
if _, err := reader.Inspect(validateBlockHash); err != nil {
b.Fatal(err)
}
}
func generateRandomCarV2File(b *testing.B, path string, minTotalBlockSize int) {
// Use fixed RNG for determinism across benchmarks.
rng := rand.New(rand.NewSource(1413))
Expand Down
30 changes: 9 additions & 21 deletions v2/block_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,19 @@ type BlockReader struct {
//
// See BlockReader.Next
func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) {
options := ApplyOptions(opts...)

// Read CARv1 header or CARv2 pragma.
// Both are a valid CARv1 header, therefore are read as such.
pragmaOrV1Header, err := carv1.ReadHeader(r)
pragmaOrV1Header, err := carv1.ReadHeader(r, options.MaxAllowedHeaderSize)
if err != nil {
return nil, err
}

// Populate the block reader version and options.
br := &BlockReader{
Version: pragmaOrV1Header.Version,
opts: ApplyOptions(opts...),
opts: options,
}

// Expect either version 1 or 2.
Expand All @@ -62,20 +64,6 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) {
if _, err := v2h.ReadFrom(r); err != nil {
return nil, err
}
// Assert the data payload offset validity.
// It must be at least 51 (<CARv2Pragma> + <CARv2Header>).
dataOffset := int64(v2h.DataOffset)
if dataOffset < PragmaSize+HeaderSize {
return nil, fmt.Errorf("invalid data payload offset: %v", dataOffset)
}
// Assert the data size validity.
// It must be larger than zero.
// Technically, it should be at least 11 bytes (i.e. a valid CARv1 header with no roots) but
// we let further parsing of the header to signal invalid data payload header.
dataSize := int64(v2h.DataSize)
if dataSize <= 0 {
return nil, fmt.Errorf("invalid data payload size: %v", dataSize)
}

// Skip to the beginning of inner CARv1 data payload.
// Note, at this point the pragma and CARv1 header have been read.
Expand All @@ -84,15 +72,15 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) {
// fast forward to the beginning of data payload by subtracting pragma and header size from
// dataOffset.
rs := internalio.ToByteReadSeeker(r)
if _, err := rs.Seek(dataOffset-PragmaSize-HeaderSize, io.SeekCurrent); err != nil {
if _, err := rs.Seek(int64(v2h.DataOffset)-PragmaSize-HeaderSize, io.SeekCurrent); err != nil {
return nil, err
}

// Set br.r to a LimitReader reading from r limited to dataSize.
br.r = io.LimitReader(r, dataSize)
br.r = io.LimitReader(r, int64(v2h.DataSize))

// Populate br.Roots by reading the inner CARv1 data payload header.
header, err := carv1.ReadHeader(br.r)
header, err := carv1.ReadHeader(br.r, options.MaxAllowedHeaderSize)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -120,7 +108,7 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) {
// immediately upon encountering a zero-length section without reading any further bytes from the
// underlying io.Reader.
func (br *BlockReader) Next() (blocks.Block, error) {
c, data, err := util.ReadNode(br.r, br.opts.ZeroLengthSectionAsEOF)
c, data, err := util.ReadNode(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize)
if err != nil {
return nil, err
}
Expand All @@ -131,7 +119,7 @@ func (br *BlockReader) Next() (blocks.Block, error) {
}

if !hashed.Equals(c) {
return nil, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed)
return nil, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, hashed)
}

return blocks.NewBlockWithCid(data, c)
Expand Down
55 changes: 55 additions & 0 deletions v2/block_reader_test.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
package car_test

import (
"bytes"
"encoding/hex"
"io"
"os"
"testing"

"github.com/ipfs/go-cid"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/internal/carv1"
mh "github.com/multiformats/go-multihash"
"github.com/multiformats/go-varint"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -104,6 +109,56 @@ func TestBlockReader_WithCarV1Consistency(t *testing.T) {
}
}

func TestMaxSectionLength(t *testing.T) {
// headerHex is the zero-roots CARv1 header
const headerHex = "11a265726f6f7473806776657273696f6e01"
headerBytes, _ := hex.DecodeString(headerHex)
// 8 MiB block of zeros
block := make([]byte, 8<<20)
// CID for that block
pfx := cid.NewPrefixV1(cid.Raw, mh.SHA2_256)
cid, err := pfx.Sum(block)
require.NoError(t, err)

// construct CAR
var buf bytes.Buffer
buf.Write(headerBytes)
buf.Write(varint.ToUvarint(uint64(len(cid.Bytes()) + len(block))))
buf.Write(cid.Bytes())
buf.Write(block)

// try to read it
car, err := carv2.NewBlockReader(bytes.NewReader(buf.Bytes()))
require.NoError(t, err)
// error should occur on first section read
_, err = car.Next()
require.EqualError(t, err, "invalid section data, length of read beyond allowable maximum")

// successful read by expanding the max section size
car, err = carv2.NewBlockReader(bytes.NewReader(buf.Bytes()), carv2.MaxAllowedSectionSize((8<<20)+40))
require.NoError(t, err)
// can now read block and get our 8 MiB zeroed byte array
readBlock, err := car.Next()
require.NoError(t, err)
require.True(t, bytes.Equal(block, readBlock.RawData()))
}

func TestMaxHeaderLength(t *testing.T) {
// headerHex is the is a 5 root CARv1 header
const headerHex = "de01a265726f6f747385d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b5016776657273696f6e01"
headerBytes, _ := hex.DecodeString(headerHex)
c, _ := cid.Decode("bafyreidykglsfhoixmivffc5uwhcgshx4j465xwqntbmu43nb2dzqwfvae")

// successful read
car, err := carv2.NewBlockReader(bytes.NewReader(headerBytes))
require.NoError(t, err)
require.ElementsMatch(t, []cid.Cid{c, c, c, c, c}, car.Roots)

// unsuccessful read, low allowable max header length (length - 3 because there are 2 bytes in the length varint prefix)
_, err = carv2.NewBlockReader(bytes.NewReader(headerBytes), carv2.MaxAllowedHeaderSize(uint64(len(headerBytes)-3)))
require.EqualError(t, err, "invalid header data, length of read beyond allowable maximum")
}

func requireReaderFromPath(t *testing.T, path string) io.Reader {
f, err := os.Open(path)
require.NoError(t, err)
Expand Down
Loading