From b5054a65e9d7190a630a4cddb4d34c381096c67d Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Tue, 14 May 2019 15:54:07 +0800 Subject: [PATCH 1/5] cmd/importer: support probability in building incremental column --- cmd/importer/README.md | 10 ++++---- cmd/importer/data.go | 54 +++++++++++++++++++++++++++--------------- cmd/importer/db.go | 46 +++++++++++++++++++++++++++++------ cmd/importer/parser.go | 16 +++++++++---- 4 files changed, 92 insertions(+), 34 deletions(-) diff --git a/cmd/importer/README.md b/cmd/importer/README.md index 56d5e3dc759c7..e43721ffb5802 100644 --- a/cmd/importer/README.md +++ b/cmd/importer/README.md @@ -77,7 +77,7 @@ mysql> select * from t; 10 rows in set (0.00 sec) ``` -Support Type: +Support Type [can only be used in none unique index without incremental rule]: tinyint | smallint | int | bigint | float | double | decimal | char | varchar | date | time | datetime | timestamp. @@ -108,7 +108,7 @@ mysql> select * from t; 10 rows in set (0.00 sec) ``` -Support Type [can only be used in unique index]: +Support Type [can only be used in unique index, or with incremental rule]: tinyint | smallint | int | bigint | float | double | decimal | date | time | datetime | timestamp. @@ -142,7 +142,7 @@ mysql> select * from t; ### incremental ``` -./importer -t "create table t(a date comment '[[incremental=1;repeats=3]]');" -P 4000 -c 1 -n 10 +./importer -t "create table t(a date comment '[[incremental=1;repeats=3;probability=100]]');" -P 4000 -c 1 -n 10 ``` Then the table rows will be like this: @@ -166,9 +166,11 @@ MySQL [test]> select * from t; 10 rows in set (0.002 sec) ``` +`probability` controls the exceptions of `incremental` and `repeats`. + Support Type [can only be used in none unique index]: -tinyint | smallint | int | bigint | float | double | decimal | varchar. +tinyint | smallint | int | bigint | float | double | decimal | date | time | datetime | timestamp. ## License Apache 2.0 license. See the [LICENSE](../../LICENSE) file for details. diff --git a/cmd/importer/data.go b/cmd/importer/data.go index 116d7978ecec9..e3927ec13b7de 100644 --- a/cmd/importer/data.go +++ b/cmd/importer/data.go @@ -15,14 +15,13 @@ package main import ( "fmt" + "math/rand" "sync" "time" "github.com/cznic/mathutil" ) -var defaultStep int64 = 1 - type datum struct { sync.Mutex @@ -32,6 +31,7 @@ type datum struct { timeValue time.Time remains uint64 repeats uint64 + probability uint32 step int64 init bool @@ -39,10 +39,10 @@ type datum struct { } func newDatum() *datum { - return &datum{intValue: -1, step: 1, repeats: 1, remains: 1} + return &datum{step: 1, repeats: 1, remains: 1, probability: 100} } -func (d *datum) setInitInt64Value(step int64, min int64, max int64) { +func (d *datum) setInitInt64Value(min int64, max int64) { d.Lock() defer d.Unlock() @@ -50,16 +50,11 @@ func (d *datum) setInitInt64Value(step int64, min int64, max int64) { return } - d.step = step - - if min != -1 { - d.minIntValue = min - d.intValue = min - } - - if min < max { - d.maxIntValue = max - d.useRange = true + d.minIntValue = min + d.maxIntValue = max + d.useRange = true + if d.step < 0 { + d.intValue = (min + max) / 2 } d.init = true @@ -75,8 +70,13 @@ func (d *datum) nextInt64() int64 { } if d.useRange { d.intValue = mathutil.MinInt64(d.intValue, d.maxIntValue) + d.intValue = mathutil.MaxInt64(d.intValue, d.minIntValue) + } + if uint32(rand.Int31n(100))+1 <= 100-d.probability { + d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 + } else { + d.remains-- } - d.remains-- return d.intValue } @@ -121,7 +121,11 @@ func (d *datum) nextTime() string { d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second) d.remains = d.repeats } - d.remains-- + if uint32(rand.Int31n(100))+1 <= 100-d.probability { + d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 + } else { + d.remains-- + } return fmt.Sprintf("%02d:%02d:%02d", d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second()) } @@ -135,7 +139,11 @@ func (d *datum) nextDate() string { d.timeValue = d.timeValue.AddDate(0, 0, int(d.step)) d.remains = d.repeats } - d.remains-- + if uint32(rand.Int31n(100))+1 <= 100-d.probability { + d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 + } else { + d.remains-- + } return fmt.Sprintf("%04d-%02d-%02d", d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day()) } @@ -148,7 +156,11 @@ func (d *datum) nextTimestamp() string { } else if d.remains <= 0 { d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second) } - d.remains-- + if uint32(rand.Int31n(100))+1 <= 100-d.probability { + d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 + } else { + d.remains-- + } return fmt.Sprintf("%04d-%02d-%02d %02d:%02d:%02d", d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day(), d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second()) @@ -164,6 +176,10 @@ func (d *datum) nextYear() string { d.timeValue = d.timeValue.AddDate(int(d.step), 0, 0) d.remains = d.repeats } - d.remains-- + if uint32(rand.Int31n(100))+1 <= 100-d.probability { + d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 + } else { + d.remains-- + } return fmt.Sprintf("%04d", d.timeValue.Year()) } diff --git a/cmd/importer/db.go b/cmd/importer/db.go index 63a47294399f3..ded6091d031af 100644 --- a/cmd/importer/db.go +++ b/cmd/importer/db.go @@ -17,6 +17,7 @@ import ( "database/sql" "fmt" "math" + "math/rand" "strconv" "strings" @@ -79,7 +80,7 @@ func randInt64Value(column *column, min int64, max int64) int64 { func nextInt64Value(column *column, min int64, max int64) int64 { min, max = intRangeValue(column, min, max) - column.data.setInitInt64Value(column.step, min, max) + column.data.setInitInt64Value(min, max) return column.data.nextInt64() } @@ -136,13 +137,24 @@ func genColumnData(table *table, column *column) (string, error) { if _, ok := table.uniqIndices[column.name]; ok { incremental = true } + if incremental { + incremental = uint32(rand.Int31n(100))+1 <= column.data.probability + // If incremental, there is only one worker, so it is safe to directly access datum. + if !incremental && column.data.remains > 0 { + column.data.remains-- + } + } isUnsigned := mysql.HasUnsignedFlag(tp.Flag) switch tp.Tp { case mysql.TypeTiny: var data int64 if incremental { - data = nextInt64Value(column, 0, math.MaxUint8) + if isUnsigned { + data = nextInt64Value(column, 0, math.MaxUint8) + } else { + data = nextInt64Value(column, math.MinInt8, math.MaxInt8) + } } else { if isUnsigned { data = randInt64Value(column, 0, math.MaxUint8) @@ -154,7 +166,11 @@ func genColumnData(table *table, column *column) (string, error) { case mysql.TypeShort: var data int64 if incremental { - data = nextInt64Value(column, 0, math.MaxUint16) + if isUnsigned { + data = nextInt64Value(column, 0, math.MaxUint16) + } else { + data = nextInt64Value(column, math.MinInt16, math.MaxInt16) + } } else { if isUnsigned { data = randInt64Value(column, 0, math.MaxUint16) @@ -166,7 +182,11 @@ func genColumnData(table *table, column *column) (string, error) { case mysql.TypeLong: var data int64 if incremental { - data = nextInt64Value(column, 0, math.MaxUint32) + if isUnsigned { + data = nextInt64Value(column, 0, math.MaxUint32) + } else { + data = nextInt64Value(column, math.MinInt32, math.MaxInt32) + } } else { if isUnsigned { data = randInt64Value(column, 0, math.MaxUint32) @@ -178,7 +198,11 @@ func genColumnData(table *table, column *column) (string, error) { case mysql.TypeLonglong: var data int64 if incremental { - data = nextInt64Value(column, 0, math.MaxInt64) + if isUnsigned { + data = nextInt64Value(column, 0, math.MaxInt64-1) + } else { + data = nextInt64Value(column, math.MinInt32, math.MaxInt32) + } } else { if isUnsigned { data = randInt64Value(column, 0, math.MaxInt64-1) @@ -200,7 +224,11 @@ func genColumnData(table *table, column *column) (string, error) { case mysql.TypeFloat, mysql.TypeDouble: var data float64 if incremental { - data = float64(nextInt64Value(column, 0, math.MaxInt64)) + if isUnsigned { + data = float64(nextInt64Value(column, 0, math.MaxInt64-1)) + } else { + data = float64(nextInt64Value(column, math.MinInt32, math.MaxInt32)) + } } else { if isUnsigned { data = float64(randInt64Value(column, 0, math.MaxInt64-1)) @@ -256,7 +284,11 @@ func genColumnData(table *table, column *column) (string, error) { limit = math.MaxInt64 } if incremental { - intVal = nextInt64Value(column, 0, limit-1) + if isUnsigned { + intVal = nextInt64Value(column, 0, limit-1) + } else { + intVal = nextInt64Value(column, (-limit+1)/2, (limit-1)/2) + } } else { if isUnsigned { intVal = randInt64Value(column, 0, limit-1) diff --git a/cmd/importer/parser.go b/cmd/importer/parser.go index b5fc27f4fd7fa..9914c5f1205a0 100644 --- a/cmd/importer/parser.go +++ b/cmd/importer/parser.go @@ -38,7 +38,6 @@ type column struct { min string max string incremental bool - step int64 set []string table *table @@ -52,7 +51,7 @@ func (col *column) String() string { } return fmt.Sprintf("[column]idx: %d, name: %s, tp: %v, min: %s, max: %s, step: %d, set: %v\n", - col.idx, col.name, col.tp, col.min, col.max, col.step, col.set) + col.idx, col.name, col.tp, col.min, col.max, col.data.step, col.set) } func (col *column) parseRule(kvs []string, uniq bool) { @@ -72,7 +71,7 @@ func (col *column) parseRule(kvs []string, uniq bool) { } } else if key == "step" { var err error - col.step, err = strconv.ParseInt(value, 10, 64) + col.data.step, err = strconv.ParseInt(value, 10, 64) if err != nil { log.Fatal(err) } @@ -97,6 +96,15 @@ func (col *column) parseRule(kvs []string, uniq bool) { } col.data.repeats = repeats col.data.remains = repeats + } else if key == "probability" { + prob, err := strconv.ParseUint(value, 10, 32) + if err != nil { + log.Fatal(err) + } + if prob > 100 || prob == 0 { + log.Fatal("probability must be in (0, 100]") + } + col.data.probability = uint32(prob) } } @@ -234,7 +242,7 @@ func parseTable(t *table, stmt *ast.CreateTableStmt) error { t.tblInfo = mockTbl for i, col := range stmt.Cols { - column := &column{idx: i + 1, table: t, step: defaultStep, data: newDatum()} + column := &column{idx: i + 1, table: t, data: newDatum()} column.parseColumn(col) } From 0ce05e5aaba01947f525322f28bfdb08564830ea Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Tue, 14 May 2019 16:44:51 +0800 Subject: [PATCH 2/5] add more comment --- cmd/importer/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/importer/README.md b/cmd/importer/README.md index e43721ffb5802..1e0f44e1512c7 100644 --- a/cmd/importer/README.md +++ b/cmd/importer/README.md @@ -166,7 +166,8 @@ MySQL [test]> select * from t; 10 rows in set (0.002 sec) ``` -`probability` controls the exceptions of `incremental` and `repeats`. +`probability` controls the exceptions of `incremental` and `repeats`, higher probability indicates that rows are +in more strict incremental order, and that number of rows in each group is closer to specified `repeats`. Support Type [can only be used in none unique index]: From 369f3f57c2e607b48167fd1953780c105b41c14a Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Tue, 14 May 2019 16:58:37 +0800 Subject: [PATCH 3/5] address comments --- cmd/importer/db.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/importer/db.go b/cmd/importer/db.go index ded6091d031af..d397da2a770ec 100644 --- a/cmd/importer/db.go +++ b/cmd/importer/db.go @@ -134,9 +134,6 @@ func genRowData(table *table) (string, error) { func genColumnData(table *table, column *column) (string, error) { tp := column.tp incremental := column.incremental - if _, ok := table.uniqIndices[column.name]; ok { - incremental = true - } if incremental { incremental = uint32(rand.Int31n(100))+1 <= column.data.probability // If incremental, there is only one worker, so it is safe to directly access datum. @@ -144,6 +141,9 @@ func genColumnData(table *table, column *column) (string, error) { column.data.remains-- } } + if _, ok := table.uniqIndices[column.name]; ok { + incremental = true + } isUnsigned := mysql.HasUnsignedFlag(tp.Flag) switch tp.Tp { From 11c5ccb7a2ad84a912606c43a0c7673c917901c4 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Tue, 14 May 2019 17:23:29 +0800 Subject: [PATCH 4/5] extract func --- cmd/importer/data.go | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/cmd/importer/data.go b/cmd/importer/data.go index e3927ec13b7de..42759b8d2ffc4 100644 --- a/cmd/importer/data.go +++ b/cmd/importer/data.go @@ -60,6 +60,14 @@ func (d *datum) setInitInt64Value(min int64, max int64) { d.init = true } +func (d *datum) updateRemains() { + if uint32(rand.Int31n(100))+1 <= 100-d.probability { + d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 + } else { + d.remains-- + } +} + func (d *datum) nextInt64() int64 { d.Lock() defer d.Unlock() @@ -72,11 +80,7 @@ func (d *datum) nextInt64() int64 { d.intValue = mathutil.MinInt64(d.intValue, d.maxIntValue) d.intValue = mathutil.MaxInt64(d.intValue, d.minIntValue) } - if uint32(rand.Int31n(100))+1 <= 100-d.probability { - d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 - } else { - d.remains-- - } + d.updateRemains() return d.intValue } @@ -121,11 +125,7 @@ func (d *datum) nextTime() string { d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second) d.remains = d.repeats } - if uint32(rand.Int31n(100))+1 <= 100-d.probability { - d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 - } else { - d.remains-- - } + d.updateRemains() return fmt.Sprintf("%02d:%02d:%02d", d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second()) } @@ -139,11 +139,7 @@ func (d *datum) nextDate() string { d.timeValue = d.timeValue.AddDate(0, 0, int(d.step)) d.remains = d.repeats } - if uint32(rand.Int31n(100))+1 <= 100-d.probability { - d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 - } else { - d.remains-- - } + d.updateRemains() return fmt.Sprintf("%04d-%02d-%02d", d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day()) } @@ -156,11 +152,7 @@ func (d *datum) nextTimestamp() string { } else if d.remains <= 0 { d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second) } - if uint32(rand.Int31n(100))+1 <= 100-d.probability { - d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 - } else { - d.remains-- - } + d.updateRemains() return fmt.Sprintf("%04d-%02d-%02d %02d:%02d:%02d", d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day(), d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second()) @@ -176,10 +168,6 @@ func (d *datum) nextYear() string { d.timeValue = d.timeValue.AddDate(int(d.step), 0, 0) d.remains = d.repeats } - if uint32(rand.Int31n(100))+1 <= 100-d.probability { - d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1 - } else { - d.remains-- - } + d.updateRemains() return fmt.Sprintf("%04d", d.timeValue.Year()) } From b7007d2425af8a34452842f0969b2105b3947e35 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Tue, 14 May 2019 21:30:15 +0800 Subject: [PATCH 5/5] fix bug of remains --- cmd/importer/data.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cmd/importer/data.go b/cmd/importer/data.go index 42759b8d2ffc4..2e14db6ccd76d 100644 --- a/cmd/importer/data.go +++ b/cmd/importer/data.go @@ -121,7 +121,8 @@ func (d *datum) nextTime() string { if d.timeValue.IsZero() { d.timeValue = time.Now() - } else if d.remains <= 0 { + } + if d.remains <= 0 { d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second) d.remains = d.repeats } @@ -135,7 +136,8 @@ func (d *datum) nextDate() string { if d.timeValue.IsZero() { d.timeValue = time.Now() - } else if d.remains <= 0 { + } + if d.remains <= 0 { d.timeValue = d.timeValue.AddDate(0, 0, int(d.step)) d.remains = d.repeats } @@ -149,8 +151,10 @@ func (d *datum) nextTimestamp() string { if d.timeValue.IsZero() { d.timeValue = time.Now() - } else if d.remains <= 0 { + } + if d.remains <= 0 { d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second) + d.remains = d.repeats } d.updateRemains() return fmt.Sprintf("%04d-%02d-%02d %02d:%02d:%02d", @@ -164,7 +168,8 @@ func (d *datum) nextYear() string { if d.timeValue.IsZero() { d.timeValue = time.Now() - } else if d.remains <= 0 { + } + if d.remains <= 0 { d.timeValue = d.timeValue.AddDate(int(d.step), 0, 0) d.remains = d.repeats }