Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
* upstream/main:
  Add doctor command for full GC of LFS (go-gitea#21978)
  remove silentcode from MAINTAINERS (go-gitea#22143)
  • Loading branch information
zjjhot committed Dec 16, 2022
2 parents bc43db4 + 651fe4b commit 8dc1017
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 40 deletions.
1 change: 0 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ Janis Estelmann <admin@oldschoolhack.me> (@KN4CK3R)
Steven Kriegler <sk.bunsenbrenner@gmail.com> (@justusbunsi)
Jimmy Praet <jimmy.praet@telenet.be> (@jpraet)
Leon Hofmeister <dev.lh@web.de> (@delvh)
silentcode <silentcode@senga.org> (@silentcodeg)
Wim <wim@42.be> (@42wim)
xinyu <xinyu@nerv.org.cn> (@penlinux)
Jason Song <i@wolfogre.com> (@wolfogre)
Expand Down
54 changes: 54 additions & 0 deletions models/git/lfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package git
import (
"context"
"fmt"
"time"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/perm"
Expand All @@ -14,6 +15,7 @@ import (
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"

Expand Down Expand Up @@ -180,6 +182,12 @@ func GetLFSMetaObjectByOid(repoID int64, oid string) (*LFSMetaObject, error) {
// RemoveLFSMetaObjectByOid removes a LFSMetaObject entry from database by its OID.
// It may return ErrLFSObjectNotExist or a database error.
func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
return RemoveLFSMetaObjectByOidFn(repoID, oid, nil)
}

// RemoveLFSMetaObjectByOidFn removes a LFSMetaObject entry from database by its OID.
// It may return ErrLFSObjectNotExist or a database error. It will run Fn with the current count within the transaction
func RemoveLFSMetaObjectByOidFn(repoID int64, oid string, fn func(count int64) error) (int64, error) {
if len(oid) == 0 {
return 0, ErrLFSObjectNotExist
}
Expand All @@ -200,6 +208,12 @@ func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
return count, err
}

if fn != nil {
if err := fn(count); err != nil {
return count, err
}
}

return count, committer.Commit()
}

Expand Down Expand Up @@ -319,3 +333,43 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
}
return lfsSize, nil
}

type IterateLFSMetaObjectsForRepoOptions struct {
OlderThan time.Time
}

// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
var start int
batchSize := setting.Database.IterateBufferSize
engine := db.GetEngine(ctx)
type CountLFSMetaObject struct {
Count int64
LFSMetaObject
}

for {
beans := make([]*CountLFSMetaObject, 0, batchSize)
// SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
Where("`lfs_meta_object`.repository_id = ?", repoID)
if !opts.OlderThan.IsZero() {
sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
}
sess.GroupBy("`lfs_meta_object`.id")
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
start += len(beans)

for _, bean := range beans {
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
return err
}
}
}
}
37 changes: 37 additions & 0 deletions modules/doctor/lfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package doctor

import (
"context"
"fmt"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/repository"
)

func init() {
Register(&Check{
Title: "Garbage collect LFS",
Name: "gc-lfs",
IsDefault: false,
Run: garbageCollectLFSCheck,
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
}

func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error {
if !setting.LFS.StartServer {
return fmt.Errorf("LFS support is disabled")
}

if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
return err
}

return checkStorage(&checkStorageOptions{LFS: true})(ctx, logger, autofix)
}
2 changes: 1 addition & 1 deletion services/cron/tasks_basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func registerRepoHealthCheck() {
for _, arg := range rhcConfig.Args {
args = append(args, git.CmdArg(arg))
}
return repo_service.GitFsck(ctx, rhcConfig.Timeout, args)
return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args)
})
}

Expand Down
86 changes: 48 additions & 38 deletions services/repository/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (
"xorm.io/builder"
)

// GitFsck calls 'git fsck' to check repository health.
func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
// GitFsckRepos calls 'git fsck' to check repository health.
func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Doing: GitFsck")

if err := db.Iterate(
Expand All @@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return db.ErrCancelledf("before fsck of %s", repo.FullName())
default:
}
log.Trace("Running health check on repository %v", repo)
repoPath := repo.RepoPath()
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
log.Warn("Failed to health check repository (%v): %v", repo, err)
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
}
return nil
return GitFsckRepo(ctx, repo, timeout, args)
},
); err != nil {
log.Trace("Error: GitFsck: %v", err)
Expand All @@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return nil
}

// GitFsckRepo calls 'git fsck' to check an individual repository's health.
func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Running health check on repository %-v", repo)
repoPath := repo.RepoPath()
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
log.Warn("Failed to health check repository (%-v): %v", repo, err)
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
}
return nil
}

// GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error {
log.Trace("Doing: GitGcRepos")
Expand All @@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return db.ErrCancelledf("before GC of %s", repo.FullName())
default:
}
log.Trace("Running git gc on %v", repo)
command := git.NewCommand(ctx, args...).
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
var stdout string
var err error
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})

if err != nil {
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

// Now update the size of the repository
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

return nil
return GitGcRepo(ctx, repo, timeout, args)
},
); err != nil {
return err
Expand All @@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return nil
}

// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Running git gc on %-v", repo)
command := git.NewCommand(ctx, args...).
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
var stdout string
var err error
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})

if err != nil {
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

// Now update the size of the repository
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

return nil
}

func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) {
repos := make([]*repo_model.Repository, 0, 10)
if err := db.Iterate(
Expand Down
105 changes: 105 additions & 0 deletions services/repository/lfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package repository

import (
"context"
"fmt"
"time"

"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"

"xorm.io/builder"
)

func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
log.Trace("Doing: GarbageCollectLFSMetaObjects")

if err := db.Iterate(
ctx,
builder.And(builder.Gt{"id": 0}),
func(ctx context.Context, repo *repo_model.Repository) error {
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
},
); err != nil {
return err
}

log.Trace("Finished: GarbageCollectLFSMetaObjects")
return nil
}

func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
if logger != nil {
logger.Info("Checking %-v", repo)
}
total, orphaned, collected, deleted := 0, 0, 0, 0
if logger != nil {
defer func() {
if orphaned == 0 {
logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
} else if !autofix {
logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
} else {
logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
}
}()
}

gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
if err != nil {
log.Error("Unable to open git repository %-v: %v", repo, err)
return err
}
defer gitRepo.Close()

store := lfs.NewContentStore()

return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
total++
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))

if gitRepo.IsObjectExist(pointerSha.String()) {
return nil
}
orphaned++

if !autofix {
return nil
}
// Non-existent pointer file
_, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error {
if count > 0 {
return nil
}

if err := store.Delete(metaObject.RelativePath()); err != nil {
log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
}
deleted++
return nil
})
if err != nil {
return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
}
collected++

return nil
}, &git_model.IterateLFSMetaObjectsForRepoOptions{
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
// objects.
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
OlderThan: time.Now().Add(-24 * 7 * time.Hour),
})
}

0 comments on commit 8dc1017

Please sign in to comment.