diff --git a/repo_tree.go b/repo_tree.go index d1a3f466..fe434d41 100644 --- a/repo_tree.go +++ b/repo_tree.go @@ -10,28 +10,42 @@ import ( "time" ) -// UnescapeChars reverses escaped characters. +// UnescapeChars reverses escaped characters in quoted output from Git. func UnescapeChars(in []byte) []byte { - if bytes.ContainsAny(in, "\\\t") { + if !bytes.ContainsRune(in, '\\') { return in } - out := bytes.Replace(in, escapedSlash, regularSlash, -1) - out = bytes.Replace(out, escapedTab, regularTab, -1) + out := make([]byte, 0, len(in)) + for i := 0; i < len(in); i++ { + if in[i] == '\\' && i+1 < len(in) { + switch in[i+1] { + case '\\': + out = append(out, '\\') + i++ + case '"': + out = append(out, '"') + i++ + case 't': + out = append(out, '\t') + i++ + case 'n': + out = append(out, '\n') + i++ + default: + out = append(out, in[i]) + } + } else { + out = append(out, in[i]) + } + } return out } -// Predefine []byte variables to avoid runtime allocations. -var ( - escapedSlash = []byte(`\\`) - regularSlash = []byte(`\`) - escapedTab = []byte(`\t`) - regularTab = []byte("\t") -) - // parseTree parses tree information from the (uncompressed) raw data of the -// tree object. -func parseTree(t *Tree, data []byte) ([]*TreeEntry, error) { +// tree object. The lineTerminator specifies the character used to separate +// entries ('\n' for normal output, '\x00' for verbatim output). +func parseTree(t *Tree, data []byte, lineTerminator byte) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) l := len(data) pos := 0 @@ -70,9 +84,7 @@ func parseTree(t *Tree, data []byte) ([]*TreeEntry, error) { entry.id = id pos += step + 1 // Skip half of SHA1. - step = bytes.IndexByte(data[pos:], '\n') - - // In case entry name is surrounded by double quotes(it happens only in git-shell). + step = bytes.IndexByte(data[pos:], lineTerminator) if data[pos] == '"' { entry.name = string(UnescapeChars(data[pos+1 : pos+step-1])) } else { @@ -89,12 +101,15 @@ func parseTree(t *Tree, data []byte) ([]*TreeEntry, error) { // // Docs: https://git-scm.com/docs/git-ls-tree type LsTreeOptions struct { + // Verbatim outputs filenames unquoted using the -z flag. This avoids issues + // with special characters in filenames that would otherwise be quoted by Git. + Verbatim bool // The timeout duration before giving up for each shell command execution. The // default timeout duration will be used when not supplied. // // Deprecated: Use CommandOptions.Timeout instead. Timeout time.Duration - // The additional options to be passed to the underlying git. + // The additional options to be passed to the underlying Git. CommandOptions } @@ -121,7 +136,11 @@ func (r *Repository) LsTree(treeID string, opts ...LsTreeOptions) (*Tree, error) repo: r, } - stdout, err := NewCommand("ls-tree"). + cmd := NewCommand("ls-tree") + if opt.Verbatim { + cmd.AddArgs("-z") + } + stdout, err := cmd. AddOptions(opt.CommandOptions). AddArgs(treeID). RunInDirWithTimeout(opt.Timeout, r.path) @@ -129,7 +148,11 @@ func (r *Repository) LsTree(treeID string, opts ...LsTreeOptions) (*Tree, error) return nil, err } - t.entries, err = parseTree(t, stdout) + lineTerminator := byte('\n') + if opt.Verbatim { + lineTerminator = 0 + } + t.entries, err = parseTree(t, stdout, lineTerminator) if err != nil { return nil, err } diff --git a/repo_tree_test.go b/repo_tree_test.go index bf398046..16beaa4a 100644 --- a/repo_tree_test.go +++ b/repo_tree_test.go @@ -5,23 +5,91 @@ package git import ( + "os" + "path/filepath" + "runtime" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestRepository_LsTree(t *testing.T) { - // Make sure it does not blow up - tree, err := testrepo.LsTree("master", LsTreeOptions{}) - if err != nil { - t.Fatal(err) +func TestUnescapeChars(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "no escapes", + in: "normal-filename.txt", + want: "normal-filename.txt", + }, + { + name: "escaped quote", + in: `Test \"Word\".md`, + want: `Test "Word".md`, + }, + { + name: "escaped backslash", + in: `path\\to\\file.txt`, + want: `path\to\file.txt`, + }, + { + name: "escaped tab", + in: `file\twith\ttabs.txt`, + want: "file\twith\ttabs.txt", + }, + { + name: "mixed escapes", + in: `\"quoted\\path\t.md`, + want: "\"quoted\\path\t.md", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := UnescapeChars([]byte(tt.in)) + assert.Equal(t, tt.want, string(got)) + }) } - assert.NotNil(t, tree) +} - // Tree ID for "gogs/" directory - tree, err = testrepo.LsTree("fcf7087e732bfe3c25328248a9bf8c3ccd85bed4", LsTreeOptions{}) - if err != nil { - t.Fatal(err) +func TestRepository_LsTree(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip(`Windows does not allow '"' in filenames`) } - assert.NotNil(t, tree) + + path := tempPath() + defer os.RemoveAll(path) + + err := Init(path) + require.NoError(t, err) + + specialName := `Test "Wiki" Page.md` + err = os.WriteFile(filepath.Join(path, specialName), []byte("content"), 0o644) + require.NoError(t, err) + + err = Add(path, AddOptions{All: true}) + require.NoError(t, err) + + err = CreateCommit(path, &Signature{Name: "test", Email: "test@test.com"}, "initial commit") + require.NoError(t, err) + + repo, err := Open(path) + require.NoError(t, err) + + commit, err := repo.CatFileCommit("HEAD") + require.NoError(t, err) + + // Without Verbatim, Git quotes and escapes the filename. + entries, err := commit.Entries() + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, specialName, entries[0].Name()) + + // With Verbatim, Git outputs the filename as-is. + entries, err = commit.Entries(LsTreeOptions{Verbatim: true}) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, specialName, entries[0].Name()) }