From 0f601a7c5236740329dc3ef0cd34efee30e20591 Mon Sep 17 00:00:00 2001 From: Thomas Legris Date: Mon, 1 Jul 2024 15:44:09 +0900 Subject: [PATCH] remove hyperscan --- go.mod | 1 - go.sum | 10 ---- main.go | 2 +- signature/hs_pattens.go | 104 ---------------------------------------- signature/signatures.go | 73 ++++++++++++++++------------ 5 files changed, 44 insertions(+), 146 deletions(-) delete mode 100644 signature/hs_pattens.go diff --git a/go.mod b/go.mod index 8c1f6a6..1d91bfd 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,6 @@ require ( github.com/deepfence/golang_deepfence_sdk/utils v0.0.0-20231201173641-092afefd00a2 github.com/deepfence/match-scanner v0.0.0-20240627065846-d2405fb72cfb github.com/fatih/color v1.16.0 - github.com/flier/gohs v1.2.2 github.com/olekukonko/tablewriter v0.0.5 github.com/sirupsen/logrus v1.9.3 google.golang.org/grpc v1.63.2 diff --git a/go.sum b/go.sum index a5ef52d..ef6043b 100644 --- a/go.sum +++ b/go.sum @@ -60,8 +60,6 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flier/gohs v1.2.2 h1:v1Pmzvv/PgYoJhmOHadKjKr0wpudb20WcF1ZF0miiM8= -github.com/flier/gohs v1.2.2/go.mod h1:YZaZuBeDNoFW94B4j+YFo7Lv3XlkwNm9vsOvk0E3kgY= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -96,8 +94,6 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= -github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= @@ -106,8 +102,6 @@ github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxC github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= github.com/hashicorp/go-retryablehttp v0.7.5 h1:bJj+Pj19UZMIweq/iie+1u5YCdGrnxCT9yvm0e+Nd5M= github.com/hashicorp/go-retryablehttp v0.7.5/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8= -github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= @@ -156,10 +150,6 @@ github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5 github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/assertions v1.13.1 h1:Ef7KhSmjZcK6AVf9YbJdvPYG9avaF0ZxudX+ThRdWfU= -github.com/smartystreets/assertions v1.13.1/go.mod h1:cXr/IwVfSo/RbCSPhoAPv73p3hlSdrBH/b3SdnW/LMY= -github.com/smartystreets/goconvey v1.8.0 h1:Oi49ha/2MURE0WexF052Z0m+BNSGirfjg5RL+JXWq3w= -github.com/smartystreets/goconvey v1.8.0/go.mod h1:EdX8jtrTIj26jmjCOVNMVSIYAtgexqXKHOXW2Dx9JLg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/main.go b/main.go index 9f550f7..9acd604 100644 --- a/main.go +++ b/main.go @@ -166,7 +166,7 @@ func main() { signature.ProcessSignatures(session.Config.Signatures) // Build Hyperscan database for fast scanning - signature.BuildHsDb() + signature.BuildRegexes() flag.Parse() diff --git a/signature/hs_pattens.go b/signature/hs_pattens.go deleted file mode 100644 index e4e228d..0000000 --- a/signature/hs_pattens.go +++ /dev/null @@ -1,104 +0,0 @@ -package signature - -import ( - "fmt" - "os" - - "github.com/deepfence/SecretScanner/core" - "github.com/flier/gohs/hyperscan" - log "github.com/sirupsen/logrus" -) - -// Build hyperscan Databases for matching different parts in the beginning -// This can be used for repeated scanning -func BuildHsDb() { - for _, part := range []string{ContentsPart, FilenamePart, PathPart, ExtPart} { - log.Debugf("Creating hyperscan database for %s", part) - hspatterns, err := CreateHsPatterns(part) - if err != nil { - fmt.Fprintf(os.Stderr, "ERROR: Unable to create patterns \"%s\": %s\n", part, err.Error()) - os.Exit(1) - } - hyperscanBlockDbMap[part] = CreateHsDb(hspatterns) - } -} - -// Create a list of hyperscan patterns with appropriate flags -// @parameters -// part - part for which list of patterns to be created: content, path, filename or extension -// @returns -// []*hyperscan.Pattern - List of hyperscan patterns -// error - Errors if any. Otherwise, returns nil -func CreateHsPatterns(part string) ([]*hyperscan.Pattern, error) { - var hsPatterns []*hyperscan.Pattern - - log.Debugf("Number of Complex Patterns for matching %s: %d", part, len(patternSignatureMap[part])) - for _, signature := range patternSignatureMap[part] { - log.Debugf("Pattern Signature %s %s %s %s %s %s %d", signature.Name, signature.Part, signature.Match, signature.Regex, signature.RegexType, signature.Severity, signature.ID) - - // Disable SomLeftMost option for large regex to avoid HS compilation failures. - // Postprocess later to find patterns - hsPattern := hyperscan.NewPattern(signature.Regex, hyperscan.DotAll|hyperscan.SomLeftMost) // hyperscan.SingleMatch - if signature.RegexType == LargeRegexType { - hsPattern = hyperscan.NewPattern(signature.Regex, hyperscan.DotAll) - if *core.GetSession().Options.MultipleMatch == false { - hsPattern = hyperscan.NewPattern(signature.Regex, hyperscan.DotAll|hyperscan.SingleMatch) - } else { - hsPattern = hyperscan.NewPattern(signature.Regex, hyperscan.DotAll) - } - } - hsPattern.Id = signature.ID - hsPatterns = append(hsPatterns, hsPattern) - } - return hsPatterns, nil -} - -// Create Hyperscan databased, which can be used for repeated scanning -// @parameters -// hsPatterns - List of hyperscan patterns -// @returns -// BlockDatabase - Hyperscan database for the given list of patterns -func CreateHsDb(hsPatterns []*hyperscan.Pattern) hyperscan.BlockDatabase { - hyperscanBlockDb, err := hyperscan.NewBlockDatabase(hsPatterns...) - if err != nil { - log.Error("ERROR: Unable to compile pattern ", err.Error()) - os.Exit(1) - } - return hyperscanBlockDb -} - -// Run hyperscan matching on the specified content -// @parameters -// hyperscanBlockDb - Hyperscan database of a list of patterns -// hsIOData - Metadata containing the contents being matched, filename, layerID etc. -// @returns -// Error - Errors if any. Otherwise, returns nil -func RunHyperscan(hyperscanBlockDb hyperscan.BlockDatabase, hsIOData HsInputOutputData) error { - hyperscanScratch, err := hyperscan.NewScratch(hyperscanBlockDb) - if err != nil { - return err - } - defer hyperscanScratch.Free() - - metadata := hsIOData - if err := hyperscanBlockDb.Scan([]byte(metadata.inputData), hyperscanScratch, hyperscanEventHandler, metadata); err != nil { - log.Infof("First 100 bytes of inputData: %s", metadata.inputData[:Min(len(metadata.inputData), 100)]) - log.Warnf("RunHyperscan: %s", err) - return err - } - return nil -} - -// This is the function that will be called by hyperscan for each match that occurs. -// @parameters -// id - ID of matched rule -// from - Start index of the match -// to - End endex of the match -// flags - This is provided by hyperscan for future use and is unused at present. -// context - Metadata containing the contents being matched, filename, layerID etc. -// @returns -// error - Errors if any. Otherwise, returns nil -func hyperscanEventHandler(id uint, from, to uint64, flags uint, context interface{}) error { - err := processHsRegexMatch(id, from, to, flags, context) - return err -} diff --git a/signature/signatures.go b/signature/signatures.go index 7953d12..a39db65 100644 --- a/signature/signatures.go +++ b/signature/signatures.go @@ -15,7 +15,6 @@ import ( "github.com/deepfence/SecretScanner/core" "github.com/deepfence/SecretScanner/output" "github.com/fatih/color" - "github.com/flier/gohs/hyperscan" log "github.com/sirupsen/logrus" ) @@ -30,7 +29,6 @@ const ( MaxSecretLength = 1000 // Maximum length of secret to search to find exact position of secrets in large regex patterns ) -// Data structure for passing inputs and getting outputs for hyperscan type HsInputOutputData struct { inputData []byte // Avoids extra memory during blacklist comparison, reduces memory pressure @@ -46,8 +44,7 @@ type HsInputOutputData struct { var ( simpleSignatureMap map[string][]core.ConfigSignature patternSignatureMap map[string][]core.ConfigSignature - hyperscanBlockDbMap map[string]hyperscan.BlockDatabase - regexpMap map[string]*regexp.Regexp + regexpMap map[string][]*regexp.Regexp signatureIDMap map[int]core.ConfigSignature ) @@ -56,8 +53,7 @@ func init() { // log.Infof("Initializing Patterns....") simpleSignatureMap = make(map[string][]core.ConfigSignature) patternSignatureMap = make(map[string][]core.ConfigSignature) - hyperscanBlockDbMap = make(map[string]hyperscan.BlockDatabase) - regexpMap = make(map[string]*regexp.Regexp) + regexpMap = make(map[string][]*regexp.Regexp) signatureIDMap = make(map[int]core.ConfigSignature) } @@ -136,31 +132,27 @@ func MatchPatternSignatures(contents io.ReadSeeker, path string, filename string // numSecrets: numSecrets, // matchedRuleSet: matchedRuleSet, //} - indexes := regexpMap[matchingPart].FindReaderSubmatchIndex(matchingStr) - if indexes != nil { - tempSecretsFound = append(tempSecretsFound, output.SecretFound{ - LayerID: layerID, - RuleID: 0, - RuleName: "", - PartToMatch: part, - Match: matchingPart[indexes[0]:indexes[1]], - Regex: regexpMap[matchingPart].String(), - Severity: "", - SeverityScore: 0, - PrintBufferStartIndex: 0, - MatchFromByte: 0, - MatchToByte: 0, - CompleteFilename: filename, - MatchedContents: "", - }) + for _, regex := range regexpMap[matchingPart] { + indexes := regex.FindReaderSubmatchIndex(matchingStr) + if indexes != nil { + tempSecretsFound = append(tempSecretsFound, output.SecretFound{ + LayerID: layerID, + RuleID: 0, + RuleName: "", + PartToMatch: part, + Match: matchingPart[indexes[0]:indexes[1]], + Regex: regex.String(), + Severity: "", + SeverityScore: 0, + PrintBufferStartIndex: 0, + MatchFromByte: 0, + MatchToByte: 0, + CompleteFilename: filename, + MatchedContents: "", + }) + break + } } - //err := RunHyperscan(hyperscanBlockDbMap[matchingPart], hsIOData) - //if err != nil { - // log.Infof("part: %s, path: %s, filename: %s, extenstion: %s, layerID: %s", - // part, path, filename, extension, layerID) - // log.Warnf("MatchPatternSignatures: %s", err) - // return tempSecretsFound, err - //} } return tempSecretsFound, nil @@ -506,3 +498,24 @@ func Max(value_0, value_1 int) int { } return value_1 } + +func BuildRegexes() { + for _, part := range []string{ContentsPart, FilenamePart, PathPart, ExtPart} { + log.Debugf("Creating hyperscan database for %s", part) + regexpMap[part] = CreateRegexpPatterns(part) + } +} + +func CreateRegexpPatterns(part string) []*regexp.Regexp { + var regPatterns []*regexp.Regexp + + log.Debugf("Number of Complex Patterns for matching %s: %d", part, len(patternSignatureMap[part])) + for _, signature := range patternSignatureMap[part] { + log.Debugf("Pattern Signature %s %s %s %s %s %s %d", signature.Name, signature.Part, signature.Match, signature.Regex, signature.RegexType, signature.Severity, signature.ID) + + pattern := regexp.MustCompile(signature.Regex) + //hsPattern.Id = signature.ID + regPatterns = append(regPatterns, pattern) + } + return regPatterns +}