diff --git a/CHANGELOG.md b/CHANGELOG.md index f5252fc7..8e5b8e51 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ Types of changes - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## [1.20.0] + +- `Added` new features for parsing and masking XML files using the following command: `cat XMLfile | pimo xml --subscriber = > outputXMLfile`. This feature supports all level 1 elements that are not arrays. + ## [1.19.0] - `Added` new features for ff1 mask : `domain`, `preserve` and `onError`. diff --git a/README.md b/README.md index ae7ce5d7..11213beb 100755 --- a/README.md +++ b/README.md @@ -960,6 +960,128 @@ By default, if not specified otherwise, these classes will be used (input -> out [Return to list of masks](#possible-masks) + +### Parsing XML files + +To use PIMO to masking data in an XML file, use in the following way : + +```bash + `cat data.xml | pimo xml --subscriber parentTagName=MaskName.yml > maskedData.xml` +``` + +Pimo selects specific tags within a predefined parent tag to replace the text and store the entire data in a new XML file. These specific tags should not contain any other nested tags. + +To mask values of attributes, follow the rules to define your choice in jsonpath in masking.yml. + +* For attributes of parent tag, we use: `@attributeName` in jsonpath. +* For attributes of child tag, we use: `childTagName@attributeName` in jsonpath. + +For example, consider an XML file named data.xml: + +**`data.xml`** + +```xml + + + + NewYork Agency + 0032 + + + Doe + 12345 + 50000 + + + Smith + 67890 + 60000 + + +``` + +In this example, you can mask the values of `agency_number` in the `agency` tag and the values of `name` and `account_number` in the `account` tag using the following command: + +```bash + `cat data.xml | pimo xml --subscriber agency=masking_agency.yml --subscriber account=masking_account.yml > maskedData.xml` +``` + +**`masking_agency.yml`** + +```yaml +version: "1" +seed: 42 + +masking: + - selector: + jsonpath: "agency_number" # this is the name of tag that will be masked + mask: + template: '{{MaskRegex "[0-9]{4}$"}}' +``` + +**`masking_account.yml`** + +```yaml +version: "1" +seed: 42 + +masking: + - selector: + jsonpath: "name" # this is the name of tag that will be masked + mask: + randomChoiceInUri: "pimo://nameFR" + - selector: + jsonpath: "@type" # this is the name of parent tag's attribute that will be masked + mask: + randomChoice: + - "classic" + - "saving" + - "securitie" + - selector: + jsonpath: "account_number" # this is the name of tag that will be masked + masks: + - incremental: + start: 1 + increment: 1 + # incremental will change string to int, need to use template to restore string value in xml file + - template: "{{.account_number}}" + - selector: + jsonpath: "name@age" # this is the name of child tag's attribute that will be masked + masks: + - randomInt: + min: 18 + max: 95 + # @ is not accepted by GO, so there we need use index in template to change int into string + - template: "{{index . \"name@age\"}}" +``` + +After executing the command with the correct configuration, here is the expected result in the file maskedData.xml: + +**`maskedData.xml`** + +```xml + + + + NewYork Agency + 2308 + + + Rolande + 1 + 50000 + + + Matéo + 2 + 60000 + + +``` + +[Return to list of masks](#possible-masks) + + ## `pimo://` scheme Pimo embed a usefule list of fake data. URIs that begin with a pimo:// sheme point to the pseudo files bellow. diff --git a/cmd/pimo/main.go b/cmd/pimo/main.go index 75385fdb..2fab0ab1 100644 --- a/cmd/pimo/main.go +++ b/cmd/pimo/main.go @@ -67,6 +67,7 @@ var ( statsTemplate string statsDestinationEnv = os.Getenv("PIMO_STATS_URL") statsTemplateEnv = os.Getenv("PIMO_STATS_TEMPLATE") + xmlSubscriberName map[string]string ) func main() { @@ -119,6 +120,66 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa fmt.Println(jsonschema) }, }) + // Add command for XML transformer + xmlCmd := &cobra.Command{ + Use: "xml", + Short: "Parsing and masking XML file", + Run: func(cmd *cobra.Command, args []string) { + initLog() + if len(catchErrors) > 0 { + skipLineOnError = true + skipLogFile = catchErrors + } + config := pimo.Config{ + EmptyInput: emptyInput, + RepeatUntil: repeatUntil, + RepeatWhile: repeatWhile, + Iteration: iteration, + SkipLineOnError: skipLineOnError, + SkipFieldOnError: skipFieldOnError, + SkipLogFile: skipLogFile, + CachesToDump: cachesToDump, + CachesToLoad: cachesToLoad, + XMLCallback: true, + } + + parser := pimo.ParseXML(cmd.InOrStdin(), cmd.OutOrStdout()) + // Map the command line balise name to fit the masking configuration + for elementName, mask := range xmlSubscriberName { + pdef, err := model.LoadPipelineDefinitionFromFile(mask) + if err != nil { + fmt.Printf("Error when charging pipeline for %s : %v\n", elementName, err) + return + } + + if cmd.Flags().Changed("seed") { + (&pdef).SetSeed(seedValue) + } + + ctx := pimo.NewContext(pdef) + if err := ctx.Configure(config); err != nil { + log.Err(err).Msg("Cannot configure pipeline") + log.Warn().Int("return", 1).Msg("End PIMO") + os.Exit(1) + } + + parser.RegisterMapCallback(elementName, func(m map[string]string) (map[string]string, error) { + transformedData, err := ctx.ExecuteMap(m) + if err != nil { + return nil, err + } + return transformedData, nil + }) + } + err := parser.Stream() + if err != nil { + log.Err(err).Msg("Error during parsing XML document") + } + }, + } + xmlCmd.Flags().StringToStringVar(&xmlSubscriberName, "subscriber", map[string]string{}, "name of element to mask") + xmlCmd.Flags().Int64VarP(&seedValue, "seed", "s", 0, "set seed") + rootCmd.AddCommand(xmlCmd) rootCmd.AddCommand(&cobra.Command{ Use: "flow", diff --git a/go.mod b/go.mod index 69959ef8..64d81d36 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/cgi-fr/pimo go 1.20 require ( + github.com/CGI-FR/xixo v0.1.7 github.com/Masterminds/sprig/v3 v3.2.3 github.com/adrienaury/zeromdc v0.0.0-20221116212822-6a366c26ee61 github.com/capitalone/fpe v1.2.1 diff --git a/go.sum b/go.sum index 520fda7b..a48f305c 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/CGI-FR/xixo v0.1.7 h1:Qg6UqO6jiKCMAJeRbw3PFC48OBDgGpm5Em7oS5yXzDs= +github.com/CGI-FR/xixo v0.1.7/go.mod h1:Q7Xf6CHqoU6hyRwPtvrUu4wCspfFYxIWZoYXTYXvtI8= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= diff --git a/internal/app/pimo/pimo.go b/internal/app/pimo/pimo.go index 0ae72e46..230f97bb 100755 --- a/internal/app/pimo/pimo.go +++ b/internal/app/pimo/pimo.go @@ -77,6 +77,7 @@ type Config struct { SkipLogFile string CachesToDump map[string]string CachesToLoad map[string]string + XMLCallback bool } type Context struct { @@ -107,6 +108,9 @@ func (ctx *Context) Configure(cfg Config) error { over.AddGlobalFields("context") switch { + case cfg.XMLCallback: + over.MDC().Set("context", "callback-input") + ctx.source = model.NewCallableMapSource() case cfg.EmptyInput: over.MDC().Set("context", "empty-input") ctx.source = model.NewSourceFromSlice([]model.Dictionary{model.NewPackedDictionary()}) @@ -331,3 +335,40 @@ func updateContext(counter int) { context := over.MDC().GetString("context") over.MDC().Set("context", re.ReplaceAllString(context, fmt.Sprintf("[%d]", counter))) } + +func (ctx *Context) ExecuteMap(data map[string]string) (map[string]string, error) { + input := model.NewDictionary() + + for k, v := range data { + input = input.With(k, v) + } + source, ok := ctx.source.(*model.CallableMapSource) + if !ok { + return nil, fmt.Errorf("Source is not CallableMapSource") + } + source.SetValue(input) + result := []model.Entry{} + err := ctx.pipeline.AddSink(model.NewSinkToSlice(&result)).Run() + if err != nil { + return nil, err + } + + newData := make(map[string]string) + + if len(result) > 0 { + new_map, ok := result[0].(model.Dictionary) + if !ok { + return nil, fmt.Errorf("result is not Dictionary") + } + unordered := new_map.Unordered() + for k, v := range unordered { + stringValue, ok := v.(string) + if !ok { + return nil, fmt.Errorf("Result is not a string") + } + newData[k] = stringValue + } + return newData, nil + } + return nil, fmt.Errorf("Result is not a map[string]string") +} diff --git a/internal/app/pimo/pimo_test.go b/internal/app/pimo/pimo_test.go index e61ccae8..d7bba324 100755 --- a/internal/app/pimo/pimo_test.go +++ b/internal/app/pimo/pimo_test.go @@ -318,3 +318,69 @@ func LoadJsonLineFromDocument(filename string) (model.Dictionary, error) { // return jsonline.JSONToDictionary(compactLine.Bytes()) } + +func Test2BaliseIdentity(t *testing.T) { + definition := model.Definition{ + Version: "1", + Seed: 42, + Masking: []model.Masking{ + { + Selector: model.SelectorType{Jsonpath: "name"}, + Mask: model.MaskType{ + RandomChoiceInURI: "pimo://nameFR", + }, + }, + }, + } + ctx := pimo.NewContext(definition) + cfg := pimo.Config{ + Iteration: 1, + XMLCallback: true, + } + + err := ctx.Configure(cfg) + assert.Nil(t, err) + + data := map[string]string{"name": "John"} + newData1, err := ctx.ExecuteMap(data) + assert.Nil(t, err) + newData2, err := ctx.ExecuteMap(data) + assert.Nil(t, err) + assert.NotEqual(t, newData2["name"], newData1["name"]) +} + +func TestExecuteMapWithAttributes(t *testing.T) { + definition := model.Definition{ + Version: "1", + Masking: []model.Masking{ + { + Selector: model.SelectorType{Jsonpath: "name"}, + Mask: model.MaskType{ + HashInURI: "pimo://nameFR", + }, + }, + { + Selector: model.SelectorType{Jsonpath: "name@age"}, + Mask: model.MaskType{ + Regex: "([0-9]){2}", + }, + }, + }, + } + ctx := pimo.NewContext(definition) + cfg := pimo.Config{ + Iteration: 1, + XMLCallback: true, + } + + err := ctx.Configure(cfg) + assert.Nil(t, err) + + data := map[string]string{"name": "John", "name@age": "25"} + + newData, err := ctx.ExecuteMap(data) + + assert.Nil(t, err) + assert.NotEqual(t, "John", newData["name"]) + assert.NotEqual(t, "25", newData["name@age"]) +} diff --git a/internal/app/pimo/xixo.go b/internal/app/pimo/xixo.go new file mode 100644 index 00000000..ed21607e --- /dev/null +++ b/internal/app/pimo/xixo.go @@ -0,0 +1,28 @@ +// Copyright (C) 2022 CGI France +// +// This file is part of PIMO. +// +// PIMO is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PIMO is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with PIMO. If not, see . + +package pimo + +import ( + "io" + + "github.com/CGI-FR/xixo/pkg/xixo" +) + +func ParseXML(input io.Reader, output io.Writer) *xixo.XMLParser { + return xixo.NewXMLParser(input, output).EnableXpath() +} diff --git a/pkg/model/model.go b/pkg/model/model.go index d5283a35..9d9c024c 100755 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -668,3 +668,34 @@ func NewSeeder(sourceField string, seed int64) Seeder { } return seeder } + +func NewCallableMapSource() *CallableMapSource { + return &CallableMapSource{} +} + +type CallableMapSource struct { + value Entry + nextValue Entry +} + +func (source *CallableMapSource) Open() error { + return nil +} + +func (source *CallableMapSource) Next() bool { + source.value = source.nextValue + source.nextValue = nil + return source.value != nil +} + +func (source *CallableMapSource) SetValue(value Entry) { + source.nextValue = value +} + +func (source *CallableMapSource) Value() Entry { + return source.value +} + +func (source *CallableMapSource) Err() error { + return nil +} diff --git a/pkg/model/model_test.go b/pkg/model/model_test.go index 4d3ac6c0..0069d52c 100755 --- a/pkg/model/model_test.go +++ b/pkg/model/model_test.go @@ -580,3 +580,26 @@ func TestCacheShouldProvide(t *testing.T) { assert.Equal(t, wanted, result) } + +func TestCallableMapSource(t *testing.T) { + source := NewCallableMapSource() + assert.NotNil(t, source) +} + +func TestCallableMapSourceNextShouldReturnFalseBeforeValueIsSetted(t *testing.T) { + source := NewCallableMapSource() + assert.False(t, source.Next()) +} + +func TestCallableMapSourceNextShouldReturnTrueAfterValueIsSetted(t *testing.T) { + source := NewCallableMapSource() + source.SetValue(NewDictionary()) + assert.True(t, source.Next()) +} + +func TestCallableMapSourceNextShouldReturnFalseAfterNextCalledTwice(t *testing.T) { + source := NewCallableMapSource() + source.SetValue(NewDictionary()) + assert.True(t, source.Next()) + assert.False(t, source.Next()) +} diff --git a/test/exemple.xml b/test/exemple.xml new file mode 100644 index 00000000..e3ad5df0 --- /dev/null +++ b/test/exemple.xml @@ -0,0 +1,26 @@ + + + + Nantes Agency + 0032 + + + Doe John + 12345 + 50000 + 10000 + + + Smith Jane + 67890 + 60000 + 12000 + + + Hello world + + 00000 + 60000 + 12000 + + diff --git a/test/exemple_expected.xml b/test/exemple_expected.xml new file mode 100644 index 00000000..d3b6739e --- /dev/null +++ b/test/exemple_expected.xml @@ -0,0 +1,25 @@ + + + + Nantes Agency + 2308 + + + Rolande + 1 + 50000 + 10000 + + + Matéo + 2 + 60000 + 12000 + + + Rosalie + 3 + 60000 + 12000 + + diff --git a/test/masking_account.yml b/test/masking_account.yml new file mode 100644 index 00000000..101b3e32 --- /dev/null +++ b/test/masking_account.yml @@ -0,0 +1,15 @@ +version: "1" +seed: 42 + +masking: + - selector: + jsonpath: "name" + mask: + randomChoiceInUri: "pimo://nameFR" + - selector: + jsonpath: "account_number" + masks: + - incremental: + start: 1 + increment: 1 + - template: "{{.account_number}}" diff --git a/test/masking_agency.yml b/test/masking_agency.yml new file mode 100644 index 00000000..6e4aac49 --- /dev/null +++ b/test/masking_agency.yml @@ -0,0 +1,8 @@ +version: "1" +seed: 42 + +masking: + - selector: + jsonpath: "agency_number" + mask: + template: '{{MaskRegex "[0-9]{4}$"}}' diff --git a/test/masking_attr.yml b/test/masking_attr.yml new file mode 100644 index 00000000..44616c55 --- /dev/null +++ b/test/masking_attr.yml @@ -0,0 +1,18 @@ +version: "1" +seed: 42 +masking: + - selector: + jsonpath: "name" + mask: + randomChoiceInUri: "pimo://nameFR" + - selector: + jsonpath: "@name" + mask: + randomChoiceInUri: "pimo://nameFR" + - selector: + jsonpath: "name@age" + masks: + - randomInt: + min: 18 + max: 95 + - template: "{{index . \"name@age\"}}" diff --git a/test/suites/parsingXML.yml b/test/suites/parsingXML.yml new file mode 100755 index 00000000..1c4448b8 --- /dev/null +++ b/test/suites/parsingXML.yml @@ -0,0 +1,55 @@ +name: parsing/masking XML features +testcases: + - name: masking XML one parent tag + steps: + - script: |- + cat > origin.xml < + + Bar + + EOF + - script: |- + cat > expected.xml < + + Rolande + + EOF + - script: |- + cat origin.xml | pimo xml --subscriber User=../masking_attr.yml > result.xml + assertions: + - result.code ShouldEqual 0 + - script: diff expected.xml result.xml + assertions: + - result.systemout ShouldBeEmpty + - script: rm -f origin.xml + - script: rm -f expected.xml + - script: rm -f result.xml + + - name: masking XML one parent tag with a given seed from cli + steps: + - script: |- + cat > origin.xml < + + Bar + + EOF + - script: |- + cat > expected.xml < + + Zacharie + + EOF + - script: |- + cat origin.xml | pimo xml --seed 41 --subscriber User=../masking_attr.yml > result.xml + assertions: + - result.code ShouldEqual 0 + - script: diff expected.xml result.xml + assertions: + - result.systemout ShouldBeEmpty + - script: rm -f origin.xml + - script: rm -f expected.xml + - script: rm -f result.xml