Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/importer: support probability in building incremental column #10454

Merged
merged 7 commits into from
May 15, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions cmd/importer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ mysql> select * from t;
10 rows in set (0.00 sec)
```

Support Type:
Support Type [can only be used in none unique index without incremental rule]:

tinyint | smallint | int | bigint | float | double | decimal | char | varchar | date | time | datetime | timestamp.

Expand Down Expand Up @@ -108,7 +108,7 @@ mysql> select * from t;
10 rows in set (0.00 sec)
```

Support Type [can only be used in unique index]:
Support Type [can only be used in unique index, or with incremental rule]:

tinyint | smallint | int | bigint | float | double | decimal | date | time | datetime | timestamp.

Expand Down Expand Up @@ -142,7 +142,7 @@ mysql> select * from t;
### incremental

```
./importer -t "create table t(a date comment '[[incremental=1;repeats=3]]');" -P 4000 -c 1 -n 10
./importer -t "create table t(a date comment '[[incremental=1;repeats=3;probability=100]]');" -P 4000 -c 1 -n 10
```

Then the table rows will be like this:
Expand All @@ -166,9 +166,11 @@ MySQL [test]> select * from t;
10 rows in set (0.002 sec)
```

`probability` controls the exceptions of `incremental` and `repeats`.
eurekaka marked this conversation as resolved.
Show resolved Hide resolved

Support Type [can only be used in none unique index]:

tinyint | smallint | int | bigint | float | double | decimal | varchar.
tinyint | smallint | int | bigint | float | double | decimal | date | time | datetime | timestamp.

## License
Apache 2.0 license. See the [LICENSE](../../LICENSE) file for details.
54 changes: 35 additions & 19 deletions cmd/importer/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ package main

import (
"fmt"
"math/rand"
"sync"
"time"

"github.com/cznic/mathutil"
)

var defaultStep int64 = 1

type datum struct {
sync.Mutex

Expand All @@ -32,34 +31,30 @@ type datum struct {
timeValue time.Time
remains uint64
repeats uint64
probability uint32
step int64

init bool
useRange bool
}

func newDatum() *datum {
return &datum{intValue: -1, step: 1, repeats: 1, remains: 1}
return &datum{step: 1, repeats: 1, remains: 1, probability: 100}
}

func (d *datum) setInitInt64Value(step int64, min int64, max int64) {
func (d *datum) setInitInt64Value(min int64, max int64) {
d.Lock()
defer d.Unlock()

if d.init {
return
}

d.step = step

if min != -1 {
d.minIntValue = min
d.intValue = min
}

if min < max {
d.maxIntValue = max
d.useRange = true
d.minIntValue = min
d.maxIntValue = max
d.useRange = true
if d.step < 0 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we specially handle 0 step?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If step is 0, we can use range or set rule, instead of incremental?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lamxTyler How do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0 step also makes senses, for example, we only want constant data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but if we want constant data, we don't need to use incremental, a better way is using set?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to do this optimization in the future, for now, let's make it work.

d.intValue = (min + max) / 2
}

d.init = true
Expand All @@ -75,8 +70,13 @@ func (d *datum) nextInt64() int64 {
}
if d.useRange {
d.intValue = mathutil.MinInt64(d.intValue, d.maxIntValue)
d.intValue = mathutil.MaxInt64(d.intValue, d.minIntValue)
}
if uint32(rand.Int31n(100))+1 <= 100-d.probability {
eurekaka marked this conversation as resolved.
Show resolved Hide resolved
d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1
} else {
d.remains--
}
d.remains--
return d.intValue
}

Expand Down Expand Up @@ -121,7 +121,11 @@ func (d *datum) nextTime() string {
d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second)
d.remains = d.repeats
}
d.remains--
if uint32(rand.Int31n(100))+1 <= 100-d.probability {
d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1
} else {
d.remains--
}
return fmt.Sprintf("%02d:%02d:%02d", d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second())
}

Expand All @@ -135,7 +139,11 @@ func (d *datum) nextDate() string {
d.timeValue = d.timeValue.AddDate(0, 0, int(d.step))
d.remains = d.repeats
}
d.remains--
if uint32(rand.Int31n(100))+1 <= 100-d.probability {
d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1
} else {
d.remains--
}
return fmt.Sprintf("%04d-%02d-%02d", d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day())
}

Expand All @@ -148,7 +156,11 @@ func (d *datum) nextTimestamp() string {
} else if d.remains <= 0 {
d.timeValue = d.timeValue.Add(time.Duration(d.step) * time.Second)
}
d.remains--
if uint32(rand.Int31n(100))+1 <= 100-d.probability {
d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1
} else {
d.remains--
}
return fmt.Sprintf("%04d-%02d-%02d %02d:%02d:%02d",
d.timeValue.Year(), d.timeValue.Month(), d.timeValue.Day(),
d.timeValue.Hour(), d.timeValue.Minute(), d.timeValue.Second())
Expand All @@ -164,6 +176,10 @@ func (d *datum) nextYear() string {
d.timeValue = d.timeValue.AddDate(int(d.step), 0, 0)
d.remains = d.repeats
}
d.remains--
if uint32(rand.Int31n(100))+1 <= 100-d.probability {
d.remains -= uint64(rand.Int63n(int64(d.remains))) + 1
} else {
d.remains--
}
return fmt.Sprintf("%04d", d.timeValue.Year())
}
46 changes: 39 additions & 7 deletions cmd/importer/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"database/sql"
"fmt"
"math"
"math/rand"
"strconv"
"strings"

Expand Down Expand Up @@ -79,7 +80,7 @@ func randInt64Value(column *column, min int64, max int64) int64 {

func nextInt64Value(column *column, min int64, max int64) int64 {
min, max = intRangeValue(column, min, max)
column.data.setInitInt64Value(column.step, min, max)
column.data.setInitInt64Value(min, max)
return column.data.nextInt64()
}

Expand Down Expand Up @@ -136,13 +137,24 @@ func genColumnData(table *table, column *column) (string, error) {
if _, ok := table.uniqIndices[column.name]; ok {
incremental = true
}
if incremental {
incremental = uint32(rand.Int31n(100))+1 <= column.data.probability
// If incremental, there is only one worker, so it is safe to directly access datum.
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
if !incremental && column.data.remains > 0 {
column.data.remains--
}
}
isUnsigned := mysql.HasUnsignedFlag(tp.Flag)

switch tp.Tp {
case mysql.TypeTiny:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxUint8)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxUint8)
} else {
data = nextInt64Value(column, math.MinInt8, math.MaxInt8)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxUint8)
Expand All @@ -154,7 +166,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeShort:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxUint16)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxUint16)
} else {
data = nextInt64Value(column, math.MinInt16, math.MaxInt16)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxUint16)
Expand All @@ -166,7 +182,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeLong:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxUint32)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxUint32)
} else {
data = nextInt64Value(column, math.MinInt32, math.MaxInt32)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxUint32)
Expand All @@ -178,7 +198,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeLonglong:
var data int64
if incremental {
data = nextInt64Value(column, 0, math.MaxInt64)
if isUnsigned {
data = nextInt64Value(column, 0, math.MaxInt64-1)
} else {
data = nextInt64Value(column, math.MinInt32, math.MaxInt32)
}
} else {
if isUnsigned {
data = randInt64Value(column, 0, math.MaxInt64-1)
Expand All @@ -200,7 +224,11 @@ func genColumnData(table *table, column *column) (string, error) {
case mysql.TypeFloat, mysql.TypeDouble:
var data float64
if incremental {
data = float64(nextInt64Value(column, 0, math.MaxInt64))
if isUnsigned {
data = float64(nextInt64Value(column, 0, math.MaxInt64-1))
} else {
data = float64(nextInt64Value(column, math.MinInt32, math.MaxInt32))
}
} else {
if isUnsigned {
data = float64(randInt64Value(column, 0, math.MaxInt64-1))
Expand Down Expand Up @@ -256,7 +284,11 @@ func genColumnData(table *table, column *column) (string, error) {
limit = math.MaxInt64
}
if incremental {
intVal = nextInt64Value(column, 0, limit-1)
if isUnsigned {
intVal = nextInt64Value(column, 0, limit-1)
} else {
intVal = nextInt64Value(column, (-limit+1)/2, (limit-1)/2)
}
} else {
if isUnsigned {
intVal = randInt64Value(column, 0, limit-1)
Expand Down
16 changes: 12 additions & 4 deletions cmd/importer/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ type column struct {
min string
max string
incremental bool
step int64
set []string

table *table
Expand All @@ -52,7 +51,7 @@ func (col *column) String() string {
}

return fmt.Sprintf("[column]idx: %d, name: %s, tp: %v, min: %s, max: %s, step: %d, set: %v\n",
col.idx, col.name, col.tp, col.min, col.max, col.step, col.set)
col.idx, col.name, col.tp, col.min, col.max, col.data.step, col.set)
}

func (col *column) parseRule(kvs []string, uniq bool) {
Expand All @@ -72,7 +71,7 @@ func (col *column) parseRule(kvs []string, uniq bool) {
}
} else if key == "step" {
var err error
col.step, err = strconv.ParseInt(value, 10, 64)
col.data.step, err = strconv.ParseInt(value, 10, 64)
if err != nil {
log.Fatal(err)
}
Expand All @@ -97,6 +96,15 @@ func (col *column) parseRule(kvs []string, uniq bool) {
}
col.data.repeats = repeats
col.data.remains = repeats
} else if key == "probability" {
prob, err := strconv.ParseUint(value, 10, 32)
if err != nil {
log.Fatal(err)
}
if prob > 100 || prob == 0 {
log.Fatal("probability must be in (0, 100]")
}
col.data.probability = uint32(prob)
}
}

Expand Down Expand Up @@ -234,7 +242,7 @@ func parseTable(t *table, stmt *ast.CreateTableStmt) error {
t.tblInfo = mockTbl

for i, col := range stmt.Cols {
column := &column{idx: i + 1, table: t, step: defaultStep, data: newDatum()}
column := &column{idx: i + 1, table: t, data: newDatum()}
column.parseColumn(col)
}

Expand Down