forgejo/services/repository/files/upload.go
wxiaoguang 6bc3079c00
Refactor git command package to improve security and maintainability ()
This PR follows  (and replace )

## Review without space diff

https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1

## Purpose of this PR

1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command

## The main idea of this PR

* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
    * Before: `AddArguments("-m").AddDynamicArguments(message)`
    * After: `AddOptionValues("-m", message)`
    * -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`

## FAQ

### Why these changes were not done in  ?

 is mainly a search&replace, it did its best to not change too
much logic.

Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.


### The naming of `AddOptionXxx`

According to git's manual, the `--xxx` part is called `option`.

### How can it guarantee that `internal.CmdArg` won't be not misused?

Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.

And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.

### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?

Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.

### Why there was a `CmdArgCheck` and why it's removed?

At the moment of , to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.


### Why many codes for `signArg == ""` is deleted?

Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00

215 lines
5.9 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package files
import (
"context"
"fmt"
"os"
"path"
"strings"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
)
// UploadRepoFileOptions contains the uploaded repository file options
type UploadRepoFileOptions struct {
LastCommitID string
OldBranch string
NewBranch string
TreePath string
Message string
Files []string // In UUID format.
Signoff bool
}
type uploadInfo struct {
upload *repo_model.Upload
lfsMetaObject *git_model.LFSMetaObject
}
func cleanUpAfterFailure(infos *[]uploadInfo, t *TemporaryUploadRepository, original error) error {
for _, info := range *infos {
if info.lfsMetaObject == nil {
continue
}
if !info.lfsMetaObject.Existing {
if _, err := git_model.RemoveLFSMetaObjectByOid(db.DefaultContext, t.repo.ID, info.lfsMetaObject.Oid); err != nil {
original = fmt.Errorf("%w, %v", original, err) // We wrap the original error - as this is the underlying error that required the fallback
}
}
}
return original
}
// UploadRepoFiles uploads files to the given repository
func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, opts *UploadRepoFileOptions) error {
if len(opts.Files) == 0 {
return nil
}
uploads, err := repo_model.GetUploadsByUUIDs(opts.Files)
if err != nil {
return fmt.Errorf("GetUploadsByUUIDs [uuids: %v]: %w", opts.Files, err)
}
names := make([]string, len(uploads))
infos := make([]uploadInfo, len(uploads))
for i, upload := range uploads {
// Check file is not lfs locked, will return nil if lock setting not enabled
filepath := path.Join(opts.TreePath, upload.Name)
lfsLock, err := git_model.GetTreePathLock(ctx, repo.ID, filepath)
if err != nil {
return err
}
if lfsLock != nil && lfsLock.OwnerID != doer.ID {
u, err := user_model.GetUserByID(ctx, lfsLock.OwnerID)
if err != nil {
return err
}
return git_model.ErrLFSFileLocked{RepoID: repo.ID, Path: filepath, UserName: u.Name}
}
names[i] = upload.Name
infos[i] = uploadInfo{upload: upload}
}
t, err := NewTemporaryUploadRepository(ctx, repo)
if err != nil {
return err
}
defer t.Close()
if err := t.Clone(opts.OldBranch); err != nil {
return err
}
if err := t.SetDefaultIndex(); err != nil {
return err
}
var filename2attribute2info map[string]map[string]string
if setting.LFS.StartServer {
filename2attribute2info, err = t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"filter"},
Filenames: names,
CachedOnly: true,
})
if err != nil {
return err
}
}
// Copy uploaded files into repository.
for i := range infos {
if err := copyUploadedLFSFileIntoRepository(&infos[i], filename2attribute2info, t, opts.TreePath); err != nil {
return err
}
}
// Now write the tree
treeHash, err := t.WriteTree()
if err != nil {
return err
}
// make author and committer the doer
author := doer
committer := doer
// Now commit the tree
commitHash, err := t.CommitTree(opts.LastCommitID, author, committer, treeHash, opts.Message, opts.Signoff)
if err != nil {
return err
}
// Now deal with LFS objects
for i := range infos {
if infos[i].lfsMetaObject == nil {
continue
}
infos[i].lfsMetaObject, err = git_model.NewLFSMetaObject(ctx, infos[i].lfsMetaObject)
if err != nil {
// OK Now we need to cleanup
return cleanUpAfterFailure(&infos, t, err)
}
// Don't move the files yet - we need to ensure that
// everything can be inserted first
}
// OK now we can insert the data into the store - there's no way to clean up the store
// once it's in there, it's in there.
contentStore := lfs.NewContentStore()
for _, info := range infos {
if err := uploadToLFSContentStore(info, contentStore); err != nil {
return cleanUpAfterFailure(&infos, t, err)
}
}
// Then push this tree to NewBranch
if err := t.Push(doer, commitHash, opts.NewBranch); err != nil {
return err
}
return repo_model.DeleteUploads(uploads...)
}
func copyUploadedLFSFileIntoRepository(info *uploadInfo, filename2attribute2info map[string]map[string]string, t *TemporaryUploadRepository, treePath string) error {
file, err := os.Open(info.upload.LocalPath())
if err != nil {
return err
}
defer file.Close()
var objectHash string
if setting.LFS.StartServer && filename2attribute2info[info.upload.Name] != nil && filename2attribute2info[info.upload.Name]["filter"] == "lfs" {
// Handle LFS
// FIXME: Inefficient! this should probably happen in models.Upload
pointer, err := lfs.GeneratePointer(file)
if err != nil {
return err
}
info.lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: t.repo.ID}
if objectHash, err = t.HashObject(strings.NewReader(pointer.StringContent())); err != nil {
return err
}
} else if objectHash, err = t.HashObject(file); err != nil {
return err
}
// Add the object to the index
return t.AddObjectToIndex("100644", objectHash, path.Join(treePath, info.upload.Name))
}
func uploadToLFSContentStore(info uploadInfo, contentStore *lfs.ContentStore) error {
if info.lfsMetaObject == nil {
return nil
}
exist, err := contentStore.Exists(info.lfsMetaObject.Pointer)
if err != nil {
return err
}
if !exist {
file, err := os.Open(info.upload.LocalPath())
if err != nil {
return err
}
defer file.Close()
// FIXME: Put regenerates the hash and copies the file over.
// I guess this strictly ensures the soundness of the store but this is inefficient.
if err := contentStore.Put(info.lfsMetaObject.Pointer, file); err != nil {
// OK Now we need to cleanup
// Can't clean up the store, once uploaded there they're there.
return err
}
}
return nil
}