Skip to content

Commit

Permalink
feat(hashincsv): add param to help configure identifier (#327)
Browse files Browse the repository at this point in the history
* feat: add parameter maxstrlen

* feat: control maxstrlen

* test: add parameter maxstrlen

* style: lint

* test: fix venom test

* feat: add maxstrlen in json schema
  • Loading branch information
adrienaury authored Sep 16, 2024
1 parent 2b8a930 commit 8e59c7c
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [1.27.0]

- `Added` parameter `maxstrlen` to `sha3` and `haqhInCSV` masks

## [1.26.1]

- `Fixed` performance issues on JSON serialization
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,8 @@ masking:

Here the length parameter is not given, but with the `resistance` parameter set to 10M, the mask will calculate the minimum length required (6 bytes in this example because 2^(6*8/2) > 10M).

It can be difficult to anticipate what will be the maximum identifier string length (in characters) because it depends to the `domain` and the value of the `length` parameter (which can be invisible in the masking configuration because it is deduced from the `resistance` parameter). Therefore an optional parameter named `maxstrlen` was created, it's only purpose is to inform with an error if the maximum length (in characters) of identifier that can be produced is greater than a threshold.

[Return to list of masks](#possible-masks)

### Range
Expand Down
6 changes: 5 additions & 1 deletion pkg/hashcsv/hashcsv.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ type MaskEngine struct {
// NewMask create a MaskRandomChoiceInCSV with a seed
func NewMask(conf model.ChoiceInCSVType, seed int64, seeder model.Seeder) (MaskEngine, error) {
template, err := template.New("template-randomInCsv").Parse(conf.URI)
if err != nil {
return MaskEngine{}, err
}
sep := ','
if len(conf.Separator) > 0 {
sep, _ = utf8.DecodeRune([]byte(conf.Separator))
Expand All @@ -59,6 +62,7 @@ func NewMask(conf model.ChoiceInCSVType, seed int64, seeder model.Seeder) (MaskE
if len(conf.Comment) > 0 {
comment, _ = utf8.DecodeRune([]byte(conf.Comment))
}
genIdentifier, err := sha3.NewMask(conf.Identifier.Length, conf.Identifier.Resistance, conf.Identifier.Domain, conf.Identifier.MaxStrLen, seed, seeder)
// nolint: gosec
return MaskEngine{
rand: rand.New(rand.NewSource(seed)),
Expand All @@ -71,7 +75,7 @@ func NewMask(conf model.ChoiceInCSVType, seed int64, seeder model.Seeder) (MaskE
fieldsPerRecord: conf.FieldsPerRecord,
trimSpaces: conf.TrimSpace,
identifierField: conf.Identifier.Field,
identifierGen: sha3.NewMask(conf.Identifier.Length, conf.Identifier.Resistance, conf.Identifier.Domain, seed, seeder),
identifierGen: genIdentifier,
}, err
}

Expand Down
1 change: 1 addition & 0 deletions pkg/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ type Sha3Type struct {
Length int `yaml:"length,omitempty" json:"length,omitempty" jsonschema:"oneof_required=Length,title=Length,description=Length of the produced output in bytes"`
Resistance int `yaml:"resistance,omitempty" json:"resistance,omitempty" jsonschema:"oneof_required=Resistance,title=Resistance,description=Collision resistance of the produced hash"`
Domain string `yaml:"domain,omitempty" json:"domain,omitempty" jsonschema_description:"allowed characters domain in the output, default to hexadecimal (0123456789abcdef)"`
MaxStrLen int `yaml:"maxstrlen,omitempty" json:"maxstrlen,omitempty" jsonschema_description:"an error will occur if the identifier can grow longer than the specified length"`
}

type MaskType struct {
Expand Down
24 changes: 21 additions & 3 deletions pkg/sha3/sha3.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,25 @@ type MaskEngine struct {
seeder model.Seeder
}

func NewMask(length int, resistance int, domain string, seed int64, seeder model.Seeder) MaskEngine {
func NewMask(length int, resistance int, domain string, maxstrlen int, seed int64, seeder model.Seeder) (MaskEngine, error) {
if len(domain) < 2 {
domain = "0123456789abcdef"
}
if resistance > 0 {
length = lengthWithResistance(resistance)
}
var err error
if maxstrlen > 0 {
err = checkMaximumStringLen(maxstrlen, length, domain)
}
salt := make([]byte, 0, 16)
salt = binary.LittleEndian.AppendUint64(salt, uint64(seed))
return MaskEngine{
length: length,
domain: domain,
salt: salt,
seeder: seeder,
}
}, err
}

func (me MaskEngine) Mask(e model.Entry, context ...model.Dictionary) (model.Entry, error) {
Expand Down Expand Up @@ -91,8 +95,9 @@ func (me MaskEngine) Mask(e model.Entry, context ...model.Dictionary) (model.Ent
func Factory(conf model.MaskFactoryConfiguration) (model.MaskEngine, bool, error) {
if conf.Masking.Mask.Sha3.Length > 0 || conf.Masking.Mask.Sha3.Resistance > 0 {
seeder := model.NewSeeder(conf.Masking.Seed.Field, conf.Seed)
mask, err := NewMask(conf.Masking.Mask.Sha3.Length, conf.Masking.Mask.Sha3.Resistance, conf.Masking.Mask.Sha3.Domain, conf.Masking.Mask.Sha3.MaxStrLen, conf.Seed, seeder)

return NewMask(conf.Masking.Mask.Sha3.Length, conf.Masking.Mask.Sha3.Resistance, conf.Masking.Mask.Sha3.Domain, conf.Seed, seeder), true, nil
return mask, true, err
}
return nil, false, nil
}
Expand All @@ -114,3 +119,16 @@ func lengthWithResistance(resistance int) int {

return int(math.Ceil(float64(power) * BASE2 / BASE8))
}

func checkMaximumStringLen(maxstrlen, length int, domain string) error {
maxVal := int(math.Pow(BASE2, float64(length*BASE8))) - 1
result, err := baseconv.Convert(fmt.Sprintf("%d", maxVal), "0123456789", domain)
if err != nil {
return err
}
log.Info().Int("maxstrlen", maxstrlen).Msgf("Identifiers will be up to %d characters long", len(result))
if len(result) > maxstrlen {
return fmt.Errorf("identifiers will exceed the maximum authorized length of %d characters (longest identifiers will be %d characters long)", maxstrlen, len(result))
}
return nil
}
8 changes: 8 additions & 0 deletions schema/v1/pimo.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@
"type": "string",
"description": "allowed characters domain in the output, default to hexadecimal (0123456789abcdef)"
},
"maxstrlen": {
"type": "integer",
"description": "an error will occur if the identifier can grow longer than the specified length"
},
"field": {
"type": "string",
"description": "Name of the identifier"
Expand Down Expand Up @@ -1022,6 +1026,10 @@
"domain": {
"type": "string",
"description": "allowed characters domain in the output, default to hexadecimal (0123456789abcdef)"
},
"maxstrlen": {
"type": "integer",
"description": "an error will occur if the identifier can grow longer than the specified length"
}
},
"additionalProperties": false,
Expand Down
37 changes: 37 additions & 0 deletions test/suites/masking_sha3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,40 @@ testcases:
- result.systemoutjson.email ShouldEqual 458539540885
- result.systemerr ShouldBeEmpty
- name: control maximum identifier length
steps:
- script: rm -f masking.yml
- script: |-
cat > masking.yml <<EOF
version: "1"
masking:
- selector:
jsonpath: "email"
mask:
sha3:
resistance: 1000000 # up to 1M values with collision resistance
domain: "0123456789" # convert to base 10 with digits 0-9
maxstrlen: 10 # identifiers should be at most 10 characters long
EOF
- script: |-
echo '{"email": "name.surname@domain.com"}' | pimo -s 123
assertions:
- result.code ShouldEqual 1
- result.systemerr ShouldContainSubstring "identifiers will exceed the maximum authorized length of 10 characters (longest identifiers will be 13 characters long)"
- script: rm -f masking.yml
- script: |-
cat > masking.yml <<EOF
version: "1"
masking:
- selector:
jsonpath: "email"
mask:
sha3:
resistance: 1000000 # up to 1M values with collision resistance
domain: "0123456789" # convert to base 10 with digits 0-9
maxstrlen: 13 # identifiers should be at most 10 characters long
EOF
- script: |-
echo '{"email": "name.surname@domain.com"}' | pimo -s 123
assertions:
- result.code ShouldEqual 0

0 comments on commit 8e59c7c

Please sign in to comment.