Skip to content

Commit

Permalink
Enhanced prune stash scanner to handle index, working copy, untracked
Browse files Browse the repository at this point in the history
  • Loading branch information
sinbad committed Aug 13, 2020
1 parent eaa85a7 commit 4049928
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 9 deletions.
5 changes: 5 additions & 0 deletions git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ func Log(args ...string) (*subprocess.BufferedCmd, error) {
return gitNoLFSBuffered(logArgs...)
}

func Show(args ...string) (*subprocess.BufferedCmd, error) {
logArgs := append([]string{"show"}, args...)
return gitNoLFSBuffered(logArgs...)
}

func LsRemote(remote, remoteRef string) (string, error) {
if remote == "" {
return "", errors.New("remote required")
Expand Down
98 changes: 89 additions & 9 deletions lfs/gitscanner_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,26 +65,102 @@ func scanUnpushed(cb GitScannerFoundPointer, remote string) error {
}

func scanStashed(cb GitScannerFoundPointer, s *GitScanner) error {
// First get the SHAs of all stashes
logArgs := []string{"-g", "--format=%H", "refs/stash", "--"}

// Stashes are actually 2-3 commits, each containing one of:
// 1. Working copy modified files
// 2. Index changes
// 3. Untracked files, if -u was used
// We need to get the diff of all 3 of these commits to ensure we have all
// of the LFS objects necessary to pop the stash

// First get the list of stashes
// In recent version of git you can get parents directly from
// this command, avoiding the intermediate "git show"
// However older gits (at least <=2.7) don't report merge parents in the reflog
// So we need to do it in 2 stages
logArgs := []string{"-g", "--format=%h", "refs/stash", "--"}

cmd, err := git.Log(logArgs...)
if err != nil {
return err
}

cmd.Start()
defer cmd.Wait()

scanner := bufio.NewScanner(cmd.Stdout)

var allStashShas []string
for scanner.Scan() {
err = s.ScanRef(strings.TrimSpace(scanner.Text()), cb)
leafSha := strings.TrimSpace(scanner.Text())

allStashShas = append(allStashShas, leafSha)

// For each leaf, use "git show" to expand parents & thus get
// all 2-3 shas involved in the stash
// As mentioned newer gits could do this in the reflog output but not gteed
showArgs := []string{"--quiet", "--format=%p", leafSha}
showCmd, err := git.Show(showArgs...)
if err != nil {
return err
}

// gets the abbreviated parent hashes as :
// A B [C]
// A = Parent commit of the stash (ignore, not part of the stash)
// B = Index changes for the hash
// C = Untracked files (optional, only present if -u)
// So we need to scan refs for A, C and optionally D
showScanner := bufio.NewScanner(showCmd.Stdout)

for showScanner.Scan() {
line := strings.TrimSpace(showScanner.Text())
refs := strings.Split(line, " ")
for i, ref := range refs {
if i > 0 { // Extra merge parents
allStashShas = append(allStashShas, ref)
}
}
}
err = showCmd.Wait()
if err != nil {
return err
}

}
err = cmd.Wait()
if err != nil {
// Ignore this error, it really only happens when there's no refs/stash
return nil
}

// Now we need to specifically use "git show" to parse results
// We can't use "git log" because weirdly that omits the index changes
// in the diff display, it collapses both into one diff and only shows the
// final change (not a 3-way like show). Only "show" on all the shas
// above displays them separately

// The "leaf" stash actually shows both the index and working copy, like this:

// - oid sha256:8e1c163c2a04e25158962537cbff2540ded60d4612506a27bc04d059c7ae16dd
// - oid sha256:f2f84832183a0fca648c1ef49cfd32632b16b47ef5f17ac07dcfcb0ae00b86e5
// -- size 16
// +++oid sha256:b23f7e7314c5921e3e1cd87456d7867a51ccbe0c2c19ee4df64525c468d775df
// +++size 30

// The second "-" entry has a space prefix which shows this as a 3-way diff
// However since we include all 2-3 commits explicitly in the git show,
// We get this line as a "+" entry in the other commit
// So we only need to care about the "+" entries
// We can use the log parser, which can now handle 3-char +/- prefixes as well

showArgs := logLfsSearchArgs
showArgs = append(showArgs, allStashShas...)
showArgs = append(showArgs, "--")

cmd, err = git.Show(showArgs...)
if err != nil {
return err
}

parseScannerLogOutput(cb, LogDiffAdditions, cmd)
return nil

}
Expand Down Expand Up @@ -181,7 +257,8 @@ func newLogScanner(dir LogDiffDirection, r io.Reader) *logScanner {
commitHeaderRegex: regexp.MustCompile(fmt.Sprintf(`^lfs-commit-sha: (%s)(?: (%s))*`, git.ObjectIDRegex, git.ObjectIDRegex)),
fileHeaderRegex: regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`),
fileMergeHeaderRegex: regexp.MustCompile(`diff --cc (.+)`),
pointerDataRegex: regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`),
// stash diff can have up to 3 +/- characters. We only capture the first one
pointerDataRegex: regexp.MustCompile(`^([\+\- ]{1,3})(version https://git-lfs|oid sha256|size|ext-).*$`),
}
}

Expand Down Expand Up @@ -273,11 +350,14 @@ func (s *logScanner) scan() (*WrappedPointer, bool) {
// -U3 will ensure we always get all of it, even if only
// the SHA changed (version & size the same)
changeType := match[1][0]
// merge lines can have 2-3 chars so can't just use changeType==' ' for blank
changeIsBlank := len(strings.TrimSpace(match[1])) == 0

// Always include unchanged context lines (normally just the version line)
if LogDiffDirection(changeType) == s.dir || changeType == ' ' {
if LogDiffDirection(changeType) == s.dir || changeIsBlank {
// Must skip diff +/- marker
s.pointerData.WriteString(line[1:])
// can be 1-3 chars (3 for merge)
s.pointerData.WriteString(line[len(match[1]):])
s.pointerData.WriteString("\n") // newline was stripped off by scanner
}
}
Expand Down

0 comments on commit 4049928

Please sign in to comment.