Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

mydump,restore: reduce parser memory allocation #108

Merged
merged 3 commits into from
Dec 24, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions lightning/mydump/parser.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package mydump

import (
"bytes"
"io"

"github.com/pkg/errors"
Expand All @@ -12,7 +13,7 @@ type ChunkParser struct {
// states for the lexer
reader io.Reader
buf []byte
bufSize int
blockBuf []byte
isLastChunk bool

lastRow Row
Expand All @@ -24,6 +25,10 @@ type ChunkParser struct {
// The list of columns in the form `(a, b, c)` in the last INSERT statement.
// Assumed to be constant throughout the entire file.
Columns []byte

// cache
remainBuf *bytes.Buffer
appendBuf *bytes.Buffer
}

// Chunk represents a portion of the data file.
Expand All @@ -43,8 +48,10 @@ type Row struct {
// NewChunkParser creates a new parser which can read chunks out of a file.
func NewChunkParser(reader io.Reader) *ChunkParser {
return &ChunkParser{
reader: reader,
bufSize: 8192,
reader: reader,
blockBuf: make([]byte, 8192),
remainBuf: &bytes.Buffer{},
appendBuf: &bytes.Buffer{},
}
}

Expand Down Expand Up @@ -85,15 +92,18 @@ func tryAppendTo(out *[]byte, tail []byte) {
}

func (parser *ChunkParser) readBlock() error {
block := make([]byte, parser.bufSize)

n, err := io.ReadFull(parser.reader, block)
n, err := io.ReadFull(parser.reader, parser.blockBuf)
switch err {
case io.ErrUnexpectedEOF, io.EOF:
parser.isLastChunk = true
fallthrough
case nil:
tryAppendTo(&parser.buf, block[:n])
lonng marked this conversation as resolved.
Show resolved Hide resolved
parser.remainBuf.Reset()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we omit remainBuf?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can not, because of parser.buf reference to appendBuf.Bytes()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

leave a comment above code

parser.remainBuf.Write(parser.buf)
parser.appendBuf.Reset()
parser.appendBuf.Write(parser.remainBuf.Bytes())
parser.appendBuf.Write(parser.blockBuf[:n])
parser.buf = parser.appendBuf.Bytes()
return nil
default:
return errors.Trace(err)
Expand Down
3 changes: 2 additions & 1 deletion lightning/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
verify "github.com/pingcap/tidb-lightning/lightning/verification"
tidbcfg "github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/util/hack"
"github.com/pingcap/tidb/util/kvencoder"
"github.com/pkg/errors"
)
Expand Down Expand Up @@ -1386,7 +1387,7 @@ func (cr *chunkRestore) restore(

// sql -> kv
start = time.Now()
kvs, _, err := kvEncoder.SQL2KV(buffer.String())
kvs, _, err := kvEncoder.SQL2KV(hack.String(buffer.Bytes()))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should change the argument type of SQL2KV from string to byte slice. You should not use hack package usually.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

encodeDur := time.Since(start)
encodeTotalDur += encodeDur
metric.BlockEncodeSecondsHistogram.Observe(encodeDur.Seconds())
Expand Down