Skip to content
This repository has been archived by the owner on Aug 21, 2023. It is now read-only.

support both filesize and rows arguments #177

Merged
merged 10 commits into from
Nov 9, 2020
4 changes: 2 additions & 2 deletions tests/consistency/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ run_sql "insert into $DB_NAME.$TABLE_NAME values $(seq -s, 100 | sed 's/,*$//g'
wait

# check data record count
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.0.sql|wc -l`
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.000000000.sql|wc -l`
echo "1st records count is ${cnt}"
[ $cnt = 100 ]

Expand All @@ -55,6 +55,6 @@ fi
# test dumpling normally
export GO_FAILPOINTS=""
run_dumpling
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.0.sql|wc -l`
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.000000000.sql|wc -l`
echo "2nd records count is ${cnt}"
[ $cnt = 200 ]
4 changes: 2 additions & 2 deletions tests/naughty_strings/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.t-schema.sql"
run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.t.sql"
run_dumpling --escape-backslash=false
# FIXME should compare the schemas too, but they differ too much among MySQL versions.
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.t.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.t.0.sql"
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.t.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.t.000000000.sql"
run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.escape-schema.sql"
run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.escape.sql"
run_dumpling --escape-backslash=true
# FIXME should compare the schemas too, but they differ too much among MySQL versions.
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.escape.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.escape.0.sql"
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.escape.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.escape.000000000.sql"
2 changes: 1 addition & 1 deletion tests/null_unique_index/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ export DUMPLING_TEST_DATABASE=$DB_NAME
run_dumpling -r 1

data="NULL"
cnt=$(sed "s/$data/$data\n/g" $DUMPLING_OUTPUT_DIR/$DB_NAME.t.1.sql | grep -c "$data") || true
cnt=$(sed "s/$data/$data\n/g" $DUMPLING_OUTPUT_DIR/$DB_NAME.t.000000001.sql | grep -c "$data") || true
[ $cnt = 1 ]

4 changes: 2 additions & 2 deletions tests/primary_key/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ for file_path in "$DUMPLING_BASE_NAME"/data/*; do
base_name=$(basename "$file_path")
table_name="${base_name%.sql}"
file_should_exist "$DUMPLING_BASE_NAME/result/$table_name.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.0.sql"
diff "$DUMPLING_BASE_NAME/result/$table_name.sql" "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.0.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.000000000.sql"
diff "$DUMPLING_BASE_NAME/result/$table_name.sql" "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.000000000.sql"
done
4 changes: 2 additions & 2 deletions tests/quote/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -eu

mkdir -p "$DUMPLING_OUTPUT_DIR"/data
cp "$DUMPLING_BASE_NAME/data/quote-database.quote-table.0.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.0.sql"
cp "$DUMPLING_BASE_NAME/data/quote-database.quote-table.000000000.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.000000000.sql"
cp "$DUMPLING_BASE_NAME/data/quote-database.quote-table-schema.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable-schema.sql"
cp "$DUMPLING_BASE_NAME/data/quote-database-schema-create.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase-schema-create.sql"

Expand All @@ -13,7 +13,7 @@ run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase-schema-create.sql"
export DUMPLING_TEST_DATABASE=$db

run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable-schema.sql"
run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.0.sql"
run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.000000000.sql"

run_dumpling

Expand Down
45 changes: 39 additions & 6 deletions tests/rows/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,21 @@ run_sql "create database $DB_NAME;"
run_sql "create table $DB_NAME.$TABLE_NAME (id int not null auto_increment primary key, a varchar(24));"

# insert 100 records
run_sql_file "$cur/data/rows.t.0.sql"
run_sql_file "$cur/data/rows.t.000000000.sql"

# make sure the estimated count is accurate
run_sql "analyze table $DB_NAME.$TABLE_NAME"

# dumping
export DUMPLING_TEST_DATABASE=$DB_NAME
run_dumpling --rows 10 --loglevel debug

# the dumping result is expected to be:
# 10 files for insertion
# FIXME the result of EXPLAIN SELECT `id` FROM `rows`.`t` randomly equal to 1 or 100, this could affect on file num.
# file_num=$(find "$DUMPLING_OUTPUT_DIR" -maxdepth 1 -iname "$DB_NAME.$TABLE_NAME.*.sql" | wc -l)
# if [ "$file_num" -ne 10 ]; then
# echo "obtain file number: $file_num, but expect: 10" && exit 1
# fi
file_num=$(find "$DUMPLING_OUTPUT_DIR" -maxdepth 1 -iname "$DB_NAME.$TABLE_NAME.*.sql" | wc -l)
if [ "$file_num" -ne 10 ]; then
echo "obtain file number: $file_num, but expect: 10" && exit 1
fi

cat "$cur/conf/lightning.toml"
# use lightning import data to tidb
Expand All @@ -40,4 +42,35 @@ run_lightning $cur/conf/lightning.toml
# check mysql and tidb data
check_sync_diff $cur/conf/diff_config.toml

# test dumpling with both rows and filesize
rm -rf "$DUMPLING_OUTPUT_DIR"
run_dumpling --rows 10 --filesize 100B --loglevel debug
# the dumping result is expected to be:
# 50 files for insertion
file_num=$(find "$DUMPLING_OUTPUT_DIR" -maxdepth 1 -iname "$DB_NAME.$TABLE_NAME.*.sql" | wc -l)
if [ "$file_num" -ne 50 ]; then
echo "obtain file number: $file_num, but expect: 50" && exit 1
fi

for i in `seq 1 10`
do
r=$(printf "%02d" $i)
for j in `seq 0 4`
do
file_name="$DUMPLING_OUTPUT_DIR/$DB_NAME.$TABLE_NAME.0000000${r}000${j}.sql"
if [ ! -f "$file_name" ]; then
echo "file $file_name doesn't exist, which is not expected" && exit 1
fi
done
done

# drop database on tidb
export DUMPLING_TEST_PORT=4000
run_sql "drop database if exists \`$DB_NAME\`;"

cat "$cur/conf/lightning.toml"
# use lightning import data to tidb
run_lightning $cur/conf/lightning.toml

# check mysql and tidb data
check_sync_diff $cur/conf/diff_config.toml
4 changes: 2 additions & 2 deletions tests/s3/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ ls "${HOST_DIR}"

curl -o "${HOST_DIR}/s3-schema-create.sql" http://localhost:5000/mybucket/dump/s3-schema-create.sql
curl -o "${HOST_DIR}/s3.t-schema.sql" http://localhost:5000/mybucket/dump/s3.t-schema.sql
curl -o "${HOST_DIR}/s3.t.0.sql" http://localhost:5000/mybucket/dump/s3.t.0.sql
curl -o "${HOST_DIR}/s3.t.000000000.sql" http://localhost:5000/mybucket/dump/s3.t.000000000.sql

file_should_exist "$HOST_DIR/s3-schema-create.sql"
file_should_exist "$HOST_DIR/s3.t-schema.sql"
file_should_exist "$HOST_DIR/s3.t.0.sql"
file_should_exist "$HOST_DIR/s3.t.000000000.sql"
2 changes: 1 addition & 1 deletion tests/tls/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ run_dumpling --ca "$DUMPLING_TEST_DIR/ca.pem" --cert "$DUMPLING_TEST_DIR/dumplin

file_should_exist "$DUMPLING_OUTPUT_DIR/tls-schema-create.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/tls.t-schema.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/tls.t.0.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/tls.t.000000000.sql"
2 changes: 1 addition & 1 deletion v4/export/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func DefaultConfig() *Config {
NoViews: true,
Rows: UnspecifiedSize,
Where: "",
FileType: "SQL",
FileType: "sql",
NoHeader: false,
NoSchemas: false,
NoData: false,
Expand Down
4 changes: 3 additions & 1 deletion v4/export/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ package export
import (
"context"
"database/sql"
"errors"
"strings"
"time"

"github.com/pingcap/dumpling/v4/log"

_ "github.com/go-sql-driver/mysql"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
pd "github.com/tikv/pd/client"
"go.uber.org/zap"
Expand Down Expand Up @@ -207,6 +207,8 @@ func Dump(pCtx context.Context, conf *Config) (err error) {
writer = SQLWriter{SimpleWriter: simpleWriter}
case "csv":
writer = CSVWriter{SimpleWriter: simpleWriter}
default:
return errors.Errorf("unsupported filetype %s", conf.FileType)
}

if conf.Sql == "" {
Expand Down
4 changes: 0 additions & 4 deletions v4/export/prepare.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,6 @@ func adjustConfig(ctx context.Context, conf *Config) error {
}
}

if conf.Rows != UnspecifiedSize {
// Disable filesize if rows was set
conf.FileSize = UnspecifiedSize
}
if conf.SessionParams == nil {
conf.SessionParams = make(map[string]interface{})
}
Expand Down
4 changes: 2 additions & 2 deletions v4/export/prepare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (s *testPrepareSuite) TestAdjustConfig(c *C) {
c.Assert(adjustConfig(nil, conf), IsNil)
conf.Sql = ""
conf.Rows = 5000
conf.FileSize = uint64(5000)
conf.FileSize = 5000
c.Assert(adjustConfig(nil, conf), IsNil)
c.Assert(conf.FileSize, Equals, uint64(UnspecifiedSize))
c.Assert(conf.FileSize, Equals, uint64(5000))
}
45 changes: 28 additions & 17 deletions v4/export/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package export
import (
"bytes"
"context"
"fmt"
"strings"
"text/template"

"github.com/pingcap/br/pkg/storage"
Expand Down Expand Up @@ -72,12 +74,12 @@ func (f SQLWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
fileName = fmt.Sprintf("%s.%s.%d.sql", ir.DatabaseName(), ir.TableName(), 0)
}
}*/
namer := newOutputFileNamer(ir)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate)
namer := newOutputFileNamer(ir, f.cfg.Rows != UnspecifiedSize && f.cfg.FileSize != UnspecifiedSize)
fileType := strings.ToLower(f.cfg.FileType)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".sql"
chunksIter := ir
defer chunksIter.Rows().Close()

Expand All @@ -96,11 +98,10 @@ func (f SQLWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
if f.cfg.FileSize == UnspecifiedSize {
break
}
fileName, err = namer.NextName(f.cfg.OutputFileTemplate)
fileName, err = namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".sql"
}
log.Debug("dumping table successfully",
zap.String("table", ir.TableName()))
Expand All @@ -126,9 +127,12 @@ func writeMetaToFile(ctx context.Context, target, metaSQL string, s storage.Exte
type CSVWriter struct{ SimpleWriter }

type outputFileNamer struct {
Index int
Index string
DB string
Table string

id int
idFormat string
kennytm marked this conversation as resolved.
Show resolved Hide resolved
}

type csvOption struct {
Expand All @@ -137,12 +141,19 @@ type csvOption struct {
delimiter []byte
}

func newOutputFileNamer(ir TableDataIR) *outputFileNamer {
return &outputFileNamer{
Index: ir.ChunkIndex(),
func newOutputFileNamer(ir TableDataIR, bothEnabled bool) *outputFileNamer {
o := &outputFileNamer{
DB: ir.DatabaseName(),
Table: ir.TableName(),
}
if bothEnabled {
o.id = 0
o.idFormat = fmt.Sprintf("%09d", ir.ChunkIndex()) + "%04d"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

13 digits will overflow an int32. Do tools that read these files parse out the number, or just rely on it for filesystem ordering?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so far all of myloader, Lightning and DM only rely on filesystem order, and won't parse the number. Lightning is the only one requiring that part to be [0-9]+ because the regex was written too tightly.

} else {
o.id = ir.ChunkIndex()
o.idFormat = "%09d"
}
return o
}

func (namer *outputFileNamer) render(tmpl *template.Template, subName string) (string, error) {
Expand All @@ -153,21 +164,22 @@ func (namer *outputFileNamer) render(tmpl *template.Template, subName string) (s
return bf.String(), nil
}

func (namer *outputFileNamer) NextName(tmpl *template.Template) (string, error) {
func (namer *outputFileNamer) NextName(tmpl *template.Template, fileType string) (string, error) {
namer.Index = fmt.Sprintf(namer.idFormat, namer.id)
res, err := namer.render(tmpl, outputFileTemplateData)
namer.Index++
return res, err
namer.id++
return res + "." + fileType, err
}

func (f CSVWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
log.Debug("start dumping table in csv format...", zap.String("table", ir.TableName()))

namer := newOutputFileNamer(ir)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate)
namer := newOutputFileNamer(ir, f.cfg.Rows != UnspecifiedSize && f.cfg.FileSize != UnspecifiedSize)
fileType := strings.ToLower(f.cfg.FileType)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".csv"
chunksIter := ir
defer chunksIter.Rows().Close()

Expand All @@ -192,11 +204,10 @@ func (f CSVWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
if f.cfg.FileSize == UnspecifiedSize {
break
}
fileName, err = namer.NextName(f.cfg.OutputFileTemplate)
fileName, err = namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".csv"
}
log.Debug("dumping table in csv format successfully",
zap.String("table", ir.TableName()))
Expand Down
10 changes: 5 additions & 5 deletions v4/export/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func (s *testDumpSuite) TestWriteTableData(c *C) {
err = writer.WriteTableData(ctx, tableIR)
c.Assert(err, IsNil)

p := path.Join(dir, "test.employee.0.sql")
p := path.Join(dir, "test.employee.000000000.sql")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should there be a test that uses the 13 digit number?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in ffcea13. The integration test in rows will also help check this.

_, err = os.Stat(p)
c.Assert(err, IsNil)
bytes, err := ioutil.ReadFile(p)
Expand Down Expand Up @@ -167,12 +167,12 @@ func (s *testDumpSuite) TestWriteTableDataWithFileSize(c *C) {
c.Assert(err, IsNil)

cases := map[string]string{
"test.employee.0.sql": "/*!40101 SET NAMES binary*/;\n" +
"test.employee.000000000.sql": "/*!40101 SET NAMES binary*/;\n" +
"/*!40014 SET FOREIGN_KEY_CHECKS=0*/;\n" +
"INSERT INTO `employee` VALUES\n" +
"(1,'male','bob@mail.com','020-1234',NULL),\n" +
"(2,'female','sarah@mail.com','020-1253','healthy');\n",
"test.employee.1.sql": "/*!40101 SET NAMES binary*/;\n" +
"test.employee.000000001.sql": "/*!40101 SET NAMES binary*/;\n" +
"/*!40014 SET FOREIGN_KEY_CHECKS=0*/;\n" +
"INSERT INTO `employee` VALUES\n" +
"(3,'male','john@mail.com','020-1256','healthy'),\n" +
Expand Down Expand Up @@ -260,14 +260,14 @@ func (s *testDumpSuite) TestWriteTableDataWithStatementSize(c *C) {
config.ExternalStorage = newStorage

cases = map[string]string{
"0-employee-te%25%2Fst.sql": "/*!40101 SET NAMES binary*/;\n" +
"000000000-employee-te%25%2Fst.sql": "/*!40101 SET NAMES binary*/;\n" +
"/*!40014 SET FOREIGN_KEY_CHECKS=0*/;\n" +
"INSERT INTO `employee` VALUES\n" +
"(1,'male','bob@mail.com','020-1234',NULL),\n" +
"(2,'female','sarah@mail.com','020-1253','healthy');\n" +
"INSERT INTO `employee` VALUES\n" +
"(3,'male','john@mail.com','020-1256','healthy');\n",
"1-employee-te%25%2Fst.sql": "/*!40101 SET NAMES binary*/;\n" +
"000000001-employee-te%25%2Fst.sql": "/*!40101 SET NAMES binary*/;\n" +
"/*!40014 SET FOREIGN_KEY_CHECKS=0*/;\n" +
"INSERT INTO `employee` VALUES\n" +
"(4,'female','sarah@mail.com','020-1235','healthy');\n",
Expand Down