Skip to content

Commit

Permalink
cmd/importer: support probability in building incremental column (#10454
Browse files Browse the repository at this point in the history
)
  • Loading branch information
eurekaka authored and zz-jason committed May 15, 2019
1 parent 7deedf8 commit 480c605
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 38 deletions.
11 changes: 7 additions & 4 deletions cmd/importer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ mysql> select * from t;
10 rows in set (0.00 sec)
```

Support Type:
Support Type [can only be used in none unique index without incremental rule]:

tinyint | smallint | int | bigint | float | double | decimal | char | varchar | date | time | datetime | timestamp.

Expand Down Expand Up @@ -108,7 +108,7 @@ mysql> select * from t;
10 rows in set (0.00 sec)
```

Support Type [can only be used in unique index]:
Support Type [can only be used in unique index, or with incremental rule]:

tinyint | smallint | int | bigint | float | double | decimal | date | time | datetime | timestamp.

Expand Down Expand Up @@ -142,7 +142,7 @@ mysql> select * from t;
### incremental

```
./importer -t "create table t(a date comment '[[incremental=1;repeats=3]]');" -P 4000 -c 1 -n 10
./importer -t "create table t(a date comment '[[incremental=1;repeats=3;probability=100]]');" -P 4000 -c 1 -n 10
```

Then the table rows will be like this:
Expand All @@ -166,9 +166,12 @@ MySQL [test]> select * from t;
10 rows in set (0.002 sec)
```

`probability` controls the exceptions of `incremental` and `repeats`, higher probability indicates that rows are
in more strict incremental order, and that number of rows in each group is closer to specified `repeats`.

Support Type [can only be used in none unique index]:

tinyint | smallint | int | bigint | float | double | decimal | varchar.
tinyint | smallint | int | bigint | float | double | decimal | date | time | datetime | timestamp.

## License
Apache 2.0 license. See the [LICENSE](../../LICENSE) file for details.
55 changes: 32 additions & 23 deletions cmd/importer/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ package main

import (
"fmt"
"math/rand"
"sync"
"time"

"github.com/cznic/mathutil"
)

var defaultStep int64 = 1

type datum struct {
sync.Mutex

Expand All @@ -32,39 +31,43 @@ type datum struct {
timeValue time.Time
remains uint64
repeats uint64
probability uint32
step int64

init bool
useRange bool
}

func newDatum() *datum {
return &datum{intValue: -1, step: 1, repeats: 1, remains: 1}
return &datum{step: 1, repeats: 1, remains: 1, probability: 100}
}

func (d *datum) setInitInt64Value(step int64, min int64, max int64) {
func (d *datum) setInitInt64Value(min int64, max int64) {
d.Lock()
defer d.Unlock()

if d.init {
return
}

d.step = step

if min != -1 {
d.minIntValue = min
d.intValue = min
}

if min < max {
d.maxIntValue = max
d.useRange = true
d.minIntValue = min
d.maxIntValue = max
d.useRange = true
if d.step < 0 {
d.intValue = (min + max) / 2
}

d.init = true
}

func (d *datum) updateRemains() {
if uint32(rand.Int31n(100))+1 <= 100-d.probability {
d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1
} else {
d.remains--
}
}

func (d *datum) nextInt64() int64 {
d.Lock()
defer d.Unlock()
Expand All @@ -75,8 +78,9 @@ func (d *datum) nextInt64() int64 {
}
if d.useRange {
d.intValue = mathutil.MinInt64(d.intValue, d.maxIntValue)
d.intValue = mathutil.MaxInt64(d.intValue, d.minIntValue)
}
d.remains--
d.updateRemains()
return d.intValue
}

Expand Down Expand Up @@ -117,11 +121,12 @@ func (d *datum) nextTime() string {

if d.timeValue.IsZero() {
d.timeValue = time.Now()
} else if d.remains <= 0 {
}
if d.remains <= 0 {
d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second)
d.remains = d.repeats
}
d.remains--
d.updateRemains()
return fmt.Sprintf("%02d:%02d:%02d", d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second())
}

Expand All @@ -131,11 +136,12 @@ func (d *datum) nextDate() string {

if d.timeValue.IsZero() {
d.timeValue = time.Now()
} else if d.remains <= 0 {
}
if d.remains <= 0 {
d.timeValue = d.timeValue.AddDate(0, 0, int(d.step))
d.remains = d.repeats
}
d.remains--
d.updateRemains()
return fmt.Sprintf("%04d-%02d-%02d", d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day())
}

Expand All @@ -145,10 +151,12 @@ func (d *datum) nextTimestamp() string {

if d.timeValue.IsZero() {
d.timeValue = time.Now()
} else if d.remains <= 0 {
}
if d.remains <= 0 {
d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second)
d.remains = d.repeats
}
d.remains--
d.updateRemains()
return fmt.Sprintf("%04d-%02d-%02d %02d:%02d:%02d",
d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day(),
d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second())
Expand All @@ -160,10 +168,11 @@ func (d *datum) nextYear() string {

if d.timeValue.IsZero() {
d.timeValue = time.Now()
} else if d.remains <= 0 {
}
if d.remains <= 0 {
d.timeValue = d.timeValue.AddDate(int(d.step), 0, 0)
d.remains = d.repeats
}
d.remains--
d.updateRemains()
return fmt.Sprintf("%04d", d.timeValue.Year())
}
46 changes: 39 additions & 7 deletions cmd/importer/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"database/sql"
"fmt"
"math"
"math/rand"
"strconv"
"strings"

Expand Down Expand Up @@ -79,7 +80,7 @@ func randInt64Value(column *column, min int64, max int64) int64 {

func nextInt64Value(column *column, min int64, max int64) int64 {
min, max = intRangeValue(column, min, max)
column.data.setInitInt64Value(column.step, min, max)
column.data.setInitInt64Value(min, max)
return column.data.nextInt64()
}

Expand Down Expand Up @@ -133,6 +134,13 @@ func genRowData(table *table) (string, error) {
func genColumnData(table *table, column *column) (string, error) {
tp := column.tp
incremental := column.incremental
if incremental {
incremental = uint32(rand.Int31n(100))+1 <= column.data.probability
// If incremental, there is only one worker, so it is safe to directly access datum.
if !incremental && column.data.remains > 0 {
column.data.remains--
}
}
if _, ok := table.uniqIndices[column.name]; ok {
incremental = true
}
Expand All @@ -142,7 +150,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeTiny:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxUint8)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxUint8)
} else {
data = nextInt64Value(column, math.MinInt8, math.MaxInt8)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxUint8)
Expand All @@ -154,7 +166,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeShort:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxUint16)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxUint16)
} else {
data = nextInt64Value(column, math.MinInt16, math.MaxInt16)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxUint16)
Expand All @@ -166,7 +182,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeLong:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxUint32)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxUint32)
} else {
data = nextInt64Value(column, math.MinInt32, math.MaxInt32)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxUint32)
Expand All @@ -178,7 +198,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeLonglong:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxInt64)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxInt64-1)
} else {
data = nextInt64Value(column, math.MinInt32, math.MaxInt32)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxInt64-1)
Expand All @@ -200,7 +224,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeFloat, mysql.TypeDouble:
var data float64
if incremental {
data = float64(nextInt64Value(column, 0, math.MaxInt64))
if isUnsigned {
data = float64(nextInt64Value(column, 0, math.MaxInt64-1))
} else {
data = float64(nextInt64Value(column, math.MinInt32, math.MaxInt32))
}
} else {
if isUnsigned {
data = float64(randInt64Value(column, 0, math.MaxInt64-1))
Expand Down Expand Up @@ -256,7 +284,11 @@ func genColumnData(table *table, column *column) (string, error) {
limit = math.MaxInt64
}
if incremental {
intVal = nextInt64Value(column, 0, limit-1)
if isUnsigned {
intVal = nextInt64Value(column, 0, limit-1)
} else {
intVal = nextInt64Value(column, (-limit+1)/2, (limit-1)/2)
}
} else {
if isUnsigned {
intVal = randInt64Value(column, 0, limit-1)
Expand Down
16 changes: 12 additions & 4 deletions cmd/importer/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ type column struct {
min string
max string
incremental bool
step int64
set []string

table *table
Expand All @@ -52,7 +51,7 @@ func (col *column) String() string {
}

return fmt.Sprintf("[column]idx: %d, name: %s, tp: %v, min: %s, max: %s, step: %d, set: %v\n",
col.idx, col.name, col.tp, col.min, col.max, col.step, col.set)
col.idx, col.name, col.tp, col.min, col.max, col.data.step, col.set)
}

func (col *column) parseRule(kvs []string, uniq bool) {
Expand All @@ -72,7 +71,7 @@ func (col *column) parseRule(kvs []string, uniq bool) {
}
} else if key == "step" {
var err error
col.step, err = strconv.ParseInt(value, 10, 64)
col.data.step, err = strconv.ParseInt(value, 10, 64)
if err != nil {
log.Fatal(err)
}
Expand All @@ -97,6 +96,15 @@ func (col *column) parseRule(kvs []string, uniq bool) {
}
col.data.repeats = repeats
col.data.remains = repeats
} else if key == "probability" {
prob, err := strconv.ParseUint(value, 10, 32)
if err != nil {
log.Fatal(err)
}
if prob > 100 || prob == 0 {
log.Fatal("probability must be in (0, 100]")
}
col.data.probability = uint32(prob)
}
}

Expand Down Expand Up @@ -234,7 +242,7 @@ func parseTable(t *table, stmt *ast.CreateTableStmt) error {
t.tblInfo = mockTbl

for i, col := range stmt.Cols {
column := &column{idx: i + 1, table: t, step: defaultStep, data: newDatum()}
column := &column{idx: i + 1, table: t, data: newDatum()}
column.parseColumn(col)
}

Expand Down

0 comments on commit 480c605

Please sign in to comment.