diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cdafd2c..8a65a98e 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ Types of changes - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## [1.27.0] + +- `Added` parameter `maxstrlen` to `sha3` and `haqhInCSV` masks + ## [1.26.1] - `Fixed` performance issues on JSON serialization diff --git a/README.md b/README.md index a396d9af..a68687ec 100755 --- a/README.md +++ b/README.md @@ -813,6 +813,8 @@ masking: Here the length parameter is not given, but with the `resistance` parameter set to 10M, the mask will calculate the minimum length required (6 bytes in this example because 2^(6*8/2) > 10M). +It can be difficult to anticipate what will be the maximum identifier string length (in characters) because it depends to the `domain` and the value of the `length` parameter (which can be invisible in the masking configuration because it is deduced from the `resistance` parameter). Therefore an optional parameter named `maxstrlen` was created, it's only purpose is to inform with an error if the maximum length (in characters) of identifier that can be produced is greater than a threshold. + [Return to list of masks](#possible-masks) ### Range diff --git a/pkg/hashcsv/hashcsv.go b/pkg/hashcsv/hashcsv.go index 78f3eec9..70e4b7a8 100644 --- a/pkg/hashcsv/hashcsv.go +++ b/pkg/hashcsv/hashcsv.go @@ -51,6 +51,9 @@ type MaskEngine struct { // NewMask create a MaskRandomChoiceInCSV with a seed func NewMask(conf model.ChoiceInCSVType, seed int64, seeder model.Seeder) (MaskEngine, error) { template, err := template.New("template-randomInCsv").Parse(conf.URI) + if err != nil { + return MaskEngine{}, err + } sep := ',' if len(conf.Separator) > 0 { sep, _ = utf8.DecodeRune([]byte(conf.Separator)) @@ -59,6 +62,7 @@ func NewMask(conf model.ChoiceInCSVType, seed int64, seeder model.Seeder) (MaskE if len(conf.Comment) > 0 { comment, _ = utf8.DecodeRune([]byte(conf.Comment)) } + genIdentifier, err := sha3.NewMask(conf.Identifier.Length, conf.Identifier.Resistance, conf.Identifier.Domain, conf.Identifier.MaxStrLen, seed, seeder) // nolint: gosec return MaskEngine{ rand: rand.New(rand.NewSource(seed)), @@ -71,7 +75,7 @@ func NewMask(conf model.ChoiceInCSVType, seed int64, seeder model.Seeder) (MaskE fieldsPerRecord: conf.FieldsPerRecord, trimSpaces: conf.TrimSpace, identifierField: conf.Identifier.Field, - identifierGen: sha3.NewMask(conf.Identifier.Length, conf.Identifier.Resistance, conf.Identifier.Domain, seed, seeder), + identifierGen: genIdentifier, }, err } diff --git a/pkg/model/model.go b/pkg/model/model.go index 70309e16..8ffbdcfd 100755 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -234,6 +234,7 @@ type Sha3Type struct { Length int `yaml:"length,omitempty" json:"length,omitempty" jsonschema:"oneof_required=Length,title=Length,description=Length of the produced output in bytes"` Resistance int `yaml:"resistance,omitempty" json:"resistance,omitempty" jsonschema:"oneof_required=Resistance,title=Resistance,description=Collision resistance of the produced hash"` Domain string `yaml:"domain,omitempty" json:"domain,omitempty" jsonschema_description:"allowed characters domain in the output, default to hexadecimal (0123456789abcdef)"` + MaxStrLen int `yaml:"maxstrlen,omitempty" json:"maxstrlen,omitempty" jsonschema_description:"an error will occur if the identifier can grow longer than the specified length"` } type MaskType struct { diff --git a/pkg/sha3/sha3.go b/pkg/sha3/sha3.go index f3a8a6a8..b0fde8cb 100644 --- a/pkg/sha3/sha3.go +++ b/pkg/sha3/sha3.go @@ -35,13 +35,17 @@ type MaskEngine struct { seeder model.Seeder } -func NewMask(length int, resistance int, domain string, seed int64, seeder model.Seeder) MaskEngine { +func NewMask(length int, resistance int, domain string, maxstrlen int, seed int64, seeder model.Seeder) (MaskEngine, error) { if len(domain) < 2 { domain = "0123456789abcdef" } if resistance > 0 { length = lengthWithResistance(resistance) } + var err error + if maxstrlen > 0 { + err = checkMaximumStringLen(maxstrlen, length, domain) + } salt := make([]byte, 0, 16) salt = binary.LittleEndian.AppendUint64(salt, uint64(seed)) return MaskEngine{ @@ -49,7 +53,7 @@ func NewMask(length int, resistance int, domain string, seed int64, seeder model domain: domain, salt: salt, seeder: seeder, - } + }, err } func (me MaskEngine) Mask(e model.Entry, context ...model.Dictionary) (model.Entry, error) { @@ -91,8 +95,9 @@ func (me MaskEngine) Mask(e model.Entry, context ...model.Dictionary) (model.Ent func Factory(conf model.MaskFactoryConfiguration) (model.MaskEngine, bool, error) { if conf.Masking.Mask.Sha3.Length > 0 || conf.Masking.Mask.Sha3.Resistance > 0 { seeder := model.NewSeeder(conf.Masking.Seed.Field, conf.Seed) + mask, err := NewMask(conf.Masking.Mask.Sha3.Length, conf.Masking.Mask.Sha3.Resistance, conf.Masking.Mask.Sha3.Domain, conf.Masking.Mask.Sha3.MaxStrLen, conf.Seed, seeder) - return NewMask(conf.Masking.Mask.Sha3.Length, conf.Masking.Mask.Sha3.Resistance, conf.Masking.Mask.Sha3.Domain, conf.Seed, seeder), true, nil + return mask, true, err } return nil, false, nil } @@ -114,3 +119,16 @@ func lengthWithResistance(resistance int) int { return int(math.Ceil(float64(power) * BASE2 / BASE8)) } + +func checkMaximumStringLen(maxstrlen, length int, domain string) error { + maxVal := int(math.Pow(BASE2, float64(length*BASE8))) - 1 + result, err := baseconv.Convert(fmt.Sprintf("%d", maxVal), "0123456789", domain) + if err != nil { + return err + } + log.Info().Int("maxstrlen", maxstrlen).Msgf("Identifiers will be up to %d characters long", len(result)) + if len(result) > maxstrlen { + return fmt.Errorf("identifiers will exceed the maximum authorized length of %d characters (longest identifiers will be %d characters long)", maxstrlen, len(result)) + } + return nil +} diff --git a/schema/v1/pimo.schema.json b/schema/v1/pimo.schema.json index 2be0a911..1cfe683b 100644 --- a/schema/v1/pimo.schema.json +++ b/schema/v1/pimo.schema.json @@ -155,6 +155,10 @@ "type": "string", "description": "allowed characters domain in the output, default to hexadecimal (0123456789abcdef)" }, + "maxstrlen": { + "type": "integer", + "description": "an error will occur if the identifier can grow longer than the specified length" + }, "field": { "type": "string", "description": "Name of the identifier" @@ -1022,6 +1026,10 @@ "domain": { "type": "string", "description": "allowed characters domain in the output, default to hexadecimal (0123456789abcdef)" + }, + "maxstrlen": { + "type": "integer", + "description": "an error will occur if the identifier can grow longer than the specified length" } }, "additionalProperties": false, diff --git a/test/suites/masking_sha3.yml b/test/suites/masking_sha3.yml index 87f63191..22c57516 100644 --- a/test/suites/masking_sha3.yml +++ b/test/suites/masking_sha3.yml @@ -43,3 +43,40 @@ testcases: - result.systemoutjson.email ShouldEqual 458539540885 - result.systemerr ShouldBeEmpty +- name: control maximum identifier length + steps: + - script: rm -f masking.yml + - script: |- + cat > masking.yml < masking.yml <