From 068ae006fe9ff2ea0f15b3eb395f868fb2d739ec Mon Sep 17 00:00:00 2001 From: MinerYang Date: Fri, 10 May 2024 17:17:47 +0800 Subject: [PATCH 1/3] Update scan job request log for enabled_capabilities (#20414) update scan job request log Signed-off-by: yminer --- src/pkg/scan/job.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pkg/scan/job.go b/src/pkg/scan/job.go index 171e0c307429..c386628724c7 100644 --- a/src/pkg/scan/job.go +++ b/src/pkg/scan/job.go @@ -419,6 +419,7 @@ func removeScanAuthInfo(sr *v1.ScanRequest) string { URL: sr.Registry.URL, Authorization: "[HIDDEN]", }, + RequestType: sr.RequestType, } str, err := req.ToJSON() From 65e266fecf7001f32018e2ef9878d620520e4ba4 Mon Sep 17 00:00:00 2001 From: Wang Yan Date: Mon, 13 May 2024 14:44:51 +0800 Subject: [PATCH 2/3] fix issue 20407 (#20416) fixes #20407 It needs to specify the insecure option on parsing the reference Signed-off-by: wang yan --- src/pkg/scan/rest/v1/models.go | 2 ++ src/pkg/scan/sbom/sbom.go | 13 +++++++------ src/pkg/scan/sbom/sbom_test.go | 4 ++-- src/pkg/scan/util.go | 3 +++ 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/pkg/scan/rest/v1/models.go b/src/pkg/scan/rest/v1/models.go index 9c25c16ea76e..06e6fb0a1c77 100644 --- a/src/pkg/scan/rest/v1/models.go +++ b/src/pkg/scan/rest/v1/models.go @@ -206,6 +206,8 @@ type Registry struct { // An optional value of the HTTP Authorization header sent with each request to the Docker Registry for getting or exchanging token. // For example, `Basic: Base64(username:password)`. Authorization string `json:"authorization"` + // Insecure is an indicator of https or http. + Insecure bool `json:"insecure"` } // ScanRequest represents a structure that is sent to a Scanner Adapter to initiate artifact scanning. diff --git a/src/pkg/scan/sbom/sbom.go b/src/pkg/scan/sbom/sbom.go index f8e6d2e43e8a..9a819d1236ff 100644 --- a/src/pkg/scan/sbom/sbom.go +++ b/src/pkg/scan/sbom/sbom.go @@ -43,13 +43,13 @@ const ( ) func init() { - scan.RegisterScanHanlder(v1.ScanTypeSbom, &scanHandler{GenAccessoryFunc: scan.GenAccessoryArt, RegistryServer: registryFQDN}) + scan.RegisterScanHanlder(v1.ScanTypeSbom, &scanHandler{GenAccessoryFunc: scan.GenAccessoryArt, RegistryServer: registry}) } // ScanHandler defines the Handler to generate sbom type scanHandler struct { GenAccessoryFunc func(scanRep v1.ScanRequest, sbomContent []byte, labels map[string]string, mediaType string, robot *model.Robot) (string, error) - RegistryServer func(ctx context.Context) string + RegistryServer func(ctx context.Context) (string, bool) } // RequestProducesMineTypes defines the mine types produced by the scan handler @@ -96,7 +96,7 @@ func (v *scanHandler) PostScan(ctx job.Context, sr *v1.ScanRequest, _ *scanModel Artifact: sr.Artifact, } // the registry server url is core by default, need to replace it with real registry server url - scanReq.Registry.URL = v.RegistryServer(ctx.SystemContext()) + scanReq.Registry.URL, scanReq.Registry.Insecure = v.RegistryServer(ctx.SystemContext()) if len(scanReq.Registry.URL) == 0 { return "", fmt.Errorf("empty registry server") } @@ -139,15 +139,16 @@ func (v *scanHandler) generateReport(startTime time.Time, repository, digest, st } // extract server name from config, and remove the protocol prefix -func registryFQDN(ctx context.Context) string { +func registry(ctx context.Context) (string, bool) { cfgMgr, ok := config.FromContext(ctx) if ok { extURL := cfgMgr.Get(context.Background(), common.ExtEndpoint).GetString() + insecure := strings.HasPrefix(extURL, "http://") server := strings.TrimPrefix(extURL, "https://") server = strings.TrimPrefix(server, "http://") - return server + return server, insecure } - return "" + return "", false } // retrieveSBOMContent retrieves the "sbom" field from the raw report diff --git a/src/pkg/scan/sbom/sbom_test.go b/src/pkg/scan/sbom/sbom_test.go index cf56b3bbb689..c1e0cd9721c5 100644 --- a/src/pkg/scan/sbom/sbom_test.go +++ b/src/pkg/scan/sbom/sbom_test.go @@ -89,8 +89,8 @@ func Test_scanHandler_RequestProducesMineTypes(t *testing.T) { } } -func mockGetRegistry(ctx context.Context) string { - return "myharbor.example.com" +func mockGetRegistry(ctx context.Context) (string, bool) { + return "myharbor.example.com", false } func mockGenAccessory(scanRep v1.ScanRequest, sbomContent []byte, labels map[string]string, mediaType string, robot *model.Robot) (string, error) { diff --git a/src/pkg/scan/util.go b/src/pkg/scan/util.go index efec3eb61360..e66e657f403e 100644 --- a/src/pkg/scan/util.go +++ b/src/pkg/scan/util.go @@ -86,6 +86,9 @@ func GenAccessoryArt(sq v1sq.ScanRequest, accData []byte, accAnnotations map[str return "", err } accRef, err := name.ParseReference(fmt.Sprintf("%s/%s@%s", sq.Registry.URL, sq.Artifact.Repository, dgst.String())) + if sq.Registry.Insecure { + accRef, err = name.ParseReference(fmt.Sprintf("%s/%s@%s", sq.Registry.URL, sq.Artifact.Repository, dgst.String()), name.Insecure) + } if err != nil { return "", err } From 232f9ba7eaa4a0a972d99d4b04542d5e8f23e51d Mon Sep 17 00:00:00 2001 From: "stonezdj(Daojun Zhang)" Date: Mon, 13 May 2024 17:12:04 +0800 Subject: [PATCH 3/3] Skip scan in-toto sbom artifact (#20415) fixes #20337 Signed-off-by: stonezdj Co-authored-by: Wang Yan --- src/controller/artifact/controller.go | 25 ++++++++++++++++- src/controller/artifact/controller_test.go | 24 ++++++++++++++++ src/controller/scan/base_controller.go | 15 ++++++++-- src/controller/scan/base_controller_test.go | 8 ++++-- src/controller/scan/checker.go | 12 ++++++++ src/controller/scan/checker_test.go | 3 +- src/testing/controller/artifact/controller.go | 28 +++++++++++++++++++ 7 files changed, 108 insertions(+), 7 deletions(-) diff --git a/src/controller/artifact/controller.go b/src/controller/artifact/controller.go index 6db99fccb08b..45f6e4f59c24 100644 --- a/src/controller/artifact/controller.go +++ b/src/controller/artifact/controller.go @@ -58,7 +58,10 @@ import ( var ( // Ctl is a global artifact controller instance - Ctl = NewController() + Ctl = NewController() + skippedContentTypes = map[string]struct{}{ + "application/vnd.in-toto+json": {}, + } ) var ( @@ -113,6 +116,8 @@ type Controller interface { RemoveLabel(ctx context.Context, artifactID int64, labelID int64) (err error) // Walk walks the artifact tree rooted at root, calling walkFn for each artifact in the tree, including root. Walk(ctx context.Context, root *Artifact, walkFn func(*Artifact) error, option *Option) error + // HasUnscannableLayer check artifact with digest if has unscannable layer + HasUnscannableLayer(ctx context.Context, dgst string) (bool, error) } // NewController creates an instance of the default artifact controller @@ -759,3 +764,21 @@ func (c *controller) populateAccessories(ctx context.Context, art *Artifact) { } art.Accessories = accs } + +// HasUnscannableLayer check if it is a in-toto sbom, if it contains any blob with a content_type is application/vnd.in-toto+json, then consider as in-toto sbom +func (c *controller) HasUnscannableLayer(ctx context.Context, dgst string) (bool, error) { + if len(dgst) == 0 { + return false, nil + } + blobs, err := c.blobMgr.GetByArt(ctx, dgst) + if err != nil { + return false, err + } + for _, b := range blobs { + if _, exist := skippedContentTypes[b.ContentType]; exist { + log.Debugf("the artifact with digest %v is unscannable, because it contains content type: %v", dgst, b.ContentType) + return true, nil + } + } + return false, nil +} diff --git a/src/controller/artifact/controller_test.go b/src/controller/artifact/controller_test.go index ce58b5d280bd..0e6ed458a268 100644 --- a/src/controller/artifact/controller_test.go +++ b/src/controller/artifact/controller_test.go @@ -35,6 +35,7 @@ import ( accessorymodel "github.com/goharbor/harbor/src/pkg/accessory/model" basemodel "github.com/goharbor/harbor/src/pkg/accessory/model/base" "github.com/goharbor/harbor/src/pkg/artifact" + "github.com/goharbor/harbor/src/pkg/blob/models" "github.com/goharbor/harbor/src/pkg/label/model" repomodel "github.com/goharbor/harbor/src/pkg/repository/model" model_tag "github.com/goharbor/harbor/src/pkg/tag/model/tag" @@ -678,6 +679,29 @@ func (c *controllerTestSuite) TestWalk() { } } +func (c *controllerTestSuite) TestIsIntoto() { + blobs := []*models.Blob{ + {Digest: "sha256:00000", ContentType: "application/vnd.oci.image.manifest.v1+json"}, + {Digest: "sha256:22222", ContentType: "application/vnd.oci.image.config.v1+json"}, + {Digest: "sha256:11111", ContentType: "application/vnd.in-toto+json"}, + } + c.blobMgr.On("GetByArt", mock.Anything, mock.Anything).Return(blobs, nil).Once() + isIntoto, err := c.ctl.HasUnscannableLayer(context.Background(), "sha256: 77777") + c.Nil(err) + c.True(isIntoto) + + blobs2 := []*models.Blob{ + {Digest: "sha256:00000", ContentType: "application/vnd.oci.image.manifest.v1+json"}, + {Digest: "sha256:22222", ContentType: "application/vnd.oci.image.config.v1+json"}, + {Digest: "sha256:11111", ContentType: "application/vnd.oci.image.layer.v1.tar+gzip"}, + } + + c.blobMgr.On("GetByArt", mock.Anything, mock.Anything).Return(blobs2, nil).Once() + isIntoto2, err := c.ctl.HasUnscannableLayer(context.Background(), "sha256: 8888") + c.Nil(err) + c.False(isIntoto2) +} + func TestControllerTestSuite(t *testing.T) { suite.Run(t, &controllerTestSuite{}) } diff --git a/src/controller/scan/base_controller.go b/src/controller/scan/base_controller.go index c70221a0fa59..14f5a29372b5 100644 --- a/src/controller/scan/base_controller.go +++ b/src/controller/scan/base_controller.go @@ -52,7 +52,6 @@ import ( sbomModel "github.com/goharbor/harbor/src/pkg/scan/sbom/model" "github.com/goharbor/harbor/src/pkg/scan/vuln" "github.com/goharbor/harbor/src/pkg/task" - "github.com/goharbor/harbor/src/testing/controller/artifact" ) var ( @@ -111,8 +110,6 @@ type basicController struct { rc robot.Controller // Tag controller tagCtl tag.Controller - // Artifact controller - artCtl artifact.Controller // UUID generator uuid uuidGenerator // Configuration getter func @@ -199,6 +196,18 @@ func (bc *basicController) collectScanningArtifacts(ctx context.Context, r *scan return nil } + // because there are lots of in-toto sbom artifacts in dockerhub and replicated to Harbor, they are considered as image type + // when scanning these type of sbom artifact, the scanner might assume it is image layer with tgz format, and if scanner read the layer with a stream of tgz, + // it fail and close the stream abruptly and cause the pannic in the harbor core log + // to avoid pannic, skip scan the in-toto sbom artifact sbom artifact + unscannable, err := bc.ar.HasUnscannableLayer(ctx, a.Digest) + if err != nil { + return err + } + if unscannable { + return nil + } + supported := hasCapability(r, a) if !supported && a.IsImageIndex() { diff --git a/src/controller/scan/base_controller_test.go b/src/controller/scan/base_controller_test.go index 19a559e0ec57..1cd464891d44 100644 --- a/src/controller/scan/base_controller_test.go +++ b/src/controller/scan/base_controller_test.go @@ -350,6 +350,7 @@ func (suite *ControllerTestSuite) TestScanControllerScan() { { // artifact not provieded suite.Require().Error(suite.c.Scan(context.TODO(), nil)) + mock.OnAnything(suite.ar, "HasUnscannableLayer").Return(false, nil).Times(3) } { @@ -448,6 +449,7 @@ func (suite *ControllerTestSuite) TestScanControllerStop() { // TestScanControllerGetReport ... func (suite *ControllerTestSuite) TestScanControllerGetReport() { + mock.OnAnything(suite.ar, "HasUnscannableLayer").Return(false, nil).Once() ctx := orm.NewContext(nil, &ormtesting.FakeOrmer{}) mock.OnAnything(suite.ar, "Walk").Return(nil).Run(func(args mock.Arguments) { walkFn := args.Get(2).(func(*artifact.Artifact) error) @@ -466,13 +468,13 @@ func (suite *ControllerTestSuite) TestScanControllerGetReport() { // TestScanControllerGetSummary ... func (suite *ControllerTestSuite) TestScanControllerGetSummary() { ctx := orm.NewContext(nil, &ormtesting.FakeOrmer{}) + mock.OnAnything(suite.ar, "HasUnscannableLayer").Return(false, nil).Once() mock.OnAnything(suite.accessoryMgr, "List").Return([]accessoryModel.Accessory{}, nil).Once() mock.OnAnything(suite.ar, "Walk").Return(nil).Run(func(args mock.Arguments) { walkFn := args.Get(2).(func(*artifact.Artifact) error) walkFn(suite.artifact) }).Once() mock.OnAnything(suite.taskMgr, "ListScanTasksByReportUUID").Return(nil, nil).Once() - sum, err := suite.c.GetSummary(ctx, suite.artifact, []string{v1.MimeTypeNativeReport}) require.NoError(suite.T(), err) assert.Equal(suite.T(), 1, len(sum)) @@ -480,6 +482,7 @@ func (suite *ControllerTestSuite) TestScanControllerGetSummary() { // TestScanControllerGetScanLog ... func (suite *ControllerTestSuite) TestScanControllerGetScanLog() { + mock.OnAnything(suite.ar, "HasUnscannableLayer").Return(false, nil).Once() ctx := orm.NewContext(nil, &ormtesting.FakeOrmer{}) mock.OnAnything(suite.taskMgr, "ListScanTasksByReportUUID").Return([]*task.Task{ { @@ -500,6 +503,7 @@ func (suite *ControllerTestSuite) TestScanControllerGetScanLog() { func (suite *ControllerTestSuite) TestScanControllerGetMultiScanLog() { ctx := orm.NewContext(nil, &ormtesting.FakeOrmer{}) + mock.OnAnything(suite.ar, "HasUnscannableLayer").Return(false, nil).Times(4) suite.taskMgr.On("ListScanTasksByReportUUID", ctx, "rp-uuid-001").Return([]*task.Task{ { ID: 1, @@ -562,7 +566,7 @@ func (suite *ControllerTestSuite) TestScanAll() { { // no artifacts found when scan all executionID := int64(1) - + mock.OnAnything(suite.ar, "HasUnscannableLayer").Return(false, nil).Once() suite.execMgr.On( "Create", mock.Anything, "SCAN_ALL", int64(0), "SCHEDULE", mock.Anything).Return(executionID, nil).Once() diff --git a/src/controller/scan/checker.go b/src/controller/scan/checker.go index a4d0e3ba3470..4b429b2bc305 100644 --- a/src/controller/scan/checker.go +++ b/src/controller/scan/checker.go @@ -86,6 +86,18 @@ func (c *checker) IsScannable(ctx context.Context, art *artifact.Artifact) (bool return artifact.ErrBreak } + // because there are lots of in-toto sbom artifacts in dockerhub and replicated to Harbor, they are considered as image type + // when scanning these type of sbom artifact, the scanner might assume it is image layer with tgz format, and if scanner read the layer with a stream of tgz, + // it fail and close the stream abruptly and cause the pannic in the harbor core log + // to avoid pannic, skip scan the in-toto sbom artifact sbom artifact + unscannable, err := c.artifactCtl.HasUnscannableLayer(ctx, a.Digest) + if err != nil { + return err + } + if unscannable { + return nil + } + return nil } diff --git a/src/controller/scan/checker_test.go b/src/controller/scan/checker_test.go index e96cb1e511fd..716e574be06e 100644 --- a/src/controller/scan/checker_test.go +++ b/src/controller/scan/checker_test.go @@ -81,7 +81,7 @@ func (suite *CheckerTestSuite) TestIsScannable() { walkFn := args.Get(2).(func(*artifact.Artifact) error) walkFn(art) }) - + mock.OnAnything(c.artifactCtl, "HasUnscannableLayer").Return(false, nil).Once() isScannable, err := c.IsScannable(context.TODO(), art) suite.Nil(err) suite.False(isScannable) @@ -97,6 +97,7 @@ func (suite *CheckerTestSuite) TestIsScannable() { walkFn := args.Get(2).(func(*artifact.Artifact) error) walkFn(art) }) + mock.OnAnything(c.artifactCtl, "HasUnscannableLayer").Return(false, nil).Once() isScannable, err := c.IsScannable(context.TODO(), art) suite.Nil(err) diff --git a/src/testing/controller/artifact/controller.go b/src/testing/controller/artifact/controller.go index 7af5d556f176..8fe07f29789a 100644 --- a/src/testing/controller/artifact/controller.go +++ b/src/testing/controller/artifact/controller.go @@ -238,6 +238,34 @@ func (_m *Controller) GetByReference(ctx context.Context, repository string, ref return r0, r1 } +// HasUnscannableLayer provides a mock function with given fields: ctx, dgst +func (_m *Controller) HasUnscannableLayer(ctx context.Context, dgst string) (bool, error) { + ret := _m.Called(ctx, dgst) + + if len(ret) == 0 { + panic("no return value specified for HasUnscannableLayer") + } + + var r0 bool + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, string) (bool, error)); ok { + return rf(ctx, dgst) + } + if rf, ok := ret.Get(0).(func(context.Context, string) bool); ok { + r0 = rf(ctx, dgst) + } else { + r0 = ret.Get(0).(bool) + } + + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, dgst) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // List provides a mock function with given fields: ctx, query, option func (_m *Controller) List(ctx context.Context, query *q.Query, option *artifact.Option) ([]*artifact.Artifact, error) { ret := _m.Called(ctx, query, option)