Skip to content

Commit

Permalink
fix: improve performance of SyncBranchesToDB
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfogre committed Mar 1, 2024
1 parent 75950d0 commit 49f224a
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 50 deletions.
5 changes: 5 additions & 0 deletions models/git/branch.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@ func GetBranch(ctx context.Context, repoID int64, branchName string) (*Branch, e
return &branch, nil
}

func GetBranches(ctx context.Context, repoID int64, branchNames []string) ([]*Branch, error) {
branches := make([]*Branch, 0, len(branchNames))
return branches, db.GetEngine(ctx).Where("repo_id=?", repoID).In("name", branchNames).Find(&branches)
}

func AddBranches(ctx context.Context, branches []*Branch) error {
for _, branch := range branches {
if _, err := db.GetEngine(ctx).Insert(branch); err != nil {
Expand Down
47 changes: 28 additions & 19 deletions routers/private/hook_post_receive.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
return
}

var branchesToSync = make([]*repo_module.PushUpdateOptions, 0, len(updates))
for _, update := range updates {
if !update.RefFullName.IsBranch() {
continue
Expand All @@ -116,31 +117,39 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
return
}
} else {
if gitRepo == nil {
var err error
gitRepo, err = gitrepo.OpenRepository(ctx, repo)
if err != nil {
log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err)
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}
commit, err := gitRepo.GetCommit(update.NewCommitID)
branchesToSync = append(branchesToSync, update)
}
}
if len(branchesToSync) > 0 {
if gitRepo == nil {
var err error
gitRepo, err = gitrepo.OpenRepository(ctx, repo)
if err != nil {
log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err)
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to get commit %s in repository: %s/%s Error: %v", update.NewCommitID, ownerName, repoName, err),
})
return
}
if err := repo_service.SyncBranchToDB(ctx, repo.ID, update.PusherID, update.RefFullName.BranchName(), commit); err != nil {
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err),
Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}

var (
branchNames = make([]string, 0, len(branchesToSync))
commitIDs = make([]string, 0, len(branchesToSync))
)
for _, update := range branchesToSync {
branchNames = append(branchNames, update.RefFullName.BranchName())
commitIDs = append(commitIDs, update.NewCommitID)
}

if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, func(commitID string) (*git.Commit, error) {
return gitRepo.GetCommit(commitID)
}); err != nil {
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}
}

Expand Down
112 changes: 81 additions & 31 deletions services/repository/branch.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,44 +222,94 @@ func checkBranchName(ctx context.Context, repo *repo_model.Repository, name stri
return err
}

// SyncBranchToDB sync the branch information in the database. It will try to update the branch first,
// SyncBranchesToDB sync the branch information in the database. It will try to update the branch first,
// if updated success with affect records > 0, then all are done. Because that means the branch has been in the database.
// If no record is affected, that means the branch does not exist in database. So there are two possibilities.
// One is this is a new branch, then we just need to insert the record. Another is the branches haven't been synced,
// then we need to sync all the branches into database.
func SyncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName string, commit *git.Commit) error {
cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit)
if err != nil {
return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err)
}
if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced.
return nil
}
func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames []string, commitIDs []string, getCommit func(commitID string) (*git.Commit, error)) error {
// Some designs that make the code look strange but are made for performance optimization purposes:
// 1. Sync branches in a batch to reduce the number of DB queries.
// 2. Lazy load commit information since it may be not necessary.
// 3. Exit early if synced all branches of git repo when there's no branch in DB.
// 4. Check the branches in DB if they are already synced.
//
// If the user pushes many branches at once, the Git hook will call the internal API in batches, rather than all at once.
// See https://github.com/go-gitea/gitea/blob/cb52b17f92e2d2293f7c003649743464492bca48/cmd/hook.go#L27
// For the first batch, it will hit optimization 3.
// For other batches, it will hit optimization 4.

if len(branchNames) != len(commitIDs) {
return fmt.Errorf("branchNames and commitIDs length not match")
}

return db.WithTx(ctx, func(ctx context.Context) error {
branches, err := git_model.GetBranches(ctx, repoID, branchNames)
if err != nil {
return fmt.Errorf("git_model.GetBranches: %v", err)
}
branchMap := make(map[string]*git_model.Branch, len(branches))
for _, branch := range branches {
branchMap[branch.Name] = branch
}

// if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21,
// we cannot simply insert the branch but need to check we have branches or not
hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{
RepoID: repoID,
IsDeletedBranch: optional.Some(false),
}.ToConds())
if err != nil {
return err
}
if !hasBranch {
if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil {
return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err)
hasBranch := len(branches) > 0

newBranches := make([]*git_model.Branch, 0, len(branchNames))

for i, branchName := range branchNames {
commitID := commitIDs[i]
if branch, ok := branchMap[branchName]; ok && branch.CommitID == commitID {
continue
}

commit, err := getCommit(branchName)
if err != nil {
return fmt.Errorf("get commit of %s failed: %v", branchName, err)
}

cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit)
if err != nil {
return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err)
}
if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced.
hasBranch = true
continue
}

if !hasBranch {
// if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21,
// we cannot simply insert the branch but need to check we have branches or not
hasBranch, err = db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{
RepoID: repoID,
IsDeletedBranch: optional.Some(false),
}.ToConds())
if err != nil {
return err
}
if !hasBranch {
if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil {
return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err)
}
return nil
}
}

// if database have branches but not this branch, it means this is a new branch
newBranches = append(newBranches, &git_model.Branch{
RepoID: repoID,
Name: branchName,
CommitID: commit.ID.String(),
CommitMessage: commit.Summary(),
PusherID: pusherID,
CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()),
})
}
return nil
}

// if database have branches but not this branch, it means this is a new branch
return db.Insert(ctx, &git_model.Branch{
RepoID: repoID,
Name: branchName,
CommitID: commit.ID.String(),
CommitMessage: commit.Summary(),
PusherID: pusherID,
CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()),
if len(newBranches) > 0 {
return db.Insert(ctx, newBranches)
}
return nil
})
}

Expand Down

0 comments on commit 49f224a

Please sign in to comment.