-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiff.go
More file actions
389 lines (356 loc) · 9.91 KB
/
diff.go
File metadata and controls
389 lines (356 loc) · 9.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
package mailpatch
import (
"regexp"
"strconv"
"strings"
)
// ChangeType classifies what happened to a file in a diff.
type ChangeType int
const (
// Modified is an in-place edit (the default).
Modified ChangeType = iota
// Added is a new file (old side is /dev/null).
Added
// Deleted is a removed file (new side is /dev/null).
Deleted
// Renamed is a move, possibly with edits.
Renamed
// Copied is a copy, possibly with edits.
Copied
)
func (c ChangeType) String() string {
switch c {
case Added:
return "added"
case Deleted:
return "deleted"
case Renamed:
return "renamed"
case Copied:
return "copied"
case Modified:
return "modified"
default:
return "modified"
}
}
// FileChange is the diff for a single file.
type FileChange struct {
OldPath string
NewPath string
Type ChangeType
IsBinary bool
// OldMode and NewMode are the unix mode strings when git reports them
// (e.g. "100644"), otherwise empty.
OldMode string
NewMode string
// Additions and Deletions count added and removed lines across all hunks.
Additions int
Deletions int
Hunks []Hunk
}
// Path returns the file's current path: NewPath, or OldPath for a deletion.
func (f FileChange) Path() string {
if f.NewPath != "" {
return f.NewPath
}
return f.OldPath
}
// Hunk is one "@@ ... @@" section of a file diff.
type Hunk struct {
OldStart int
OldLines int
NewStart int
NewLines int
// Section is the text after the closing "@@" (often the enclosing
// function), trimmed.
Section string
Lines []Line
}
// LineKind tags a diff line as context, addition, or deletion.
type LineKind int
const (
// Context is an unchanged line (leading space).
Context LineKind = iota
// Add is an added line (leading '+').
Add
// Delete is a removed line (leading '-').
Delete
)
// Line is one line within a hunk, with its leading +/-/space removed.
type Line struct {
Kind LineKind
Text string
}
// DiffStat is the summary count across a set of file changes.
type DiffStat struct {
FilesChanged int
Additions int
Deletions int
}
var hunkRe = regexp.MustCompile(`^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$`)
// ParseDiff parses a unified diff (git or plain) into per-file changes. It
// accepts the output of `git diff`/`git format-patch` as well as a bare
// "--- / +++ / @@" diff with no "diff --git" headers. Unrecognized lines are
// ignored, so a diff embedded in surrounding text still parses.
func ParseDiff(diff string) ([]FileChange, error) {
var p diffParser
for _, line := range strings.Split(diff, "\n") {
p.consume(line)
}
p.flush()
return p.files, nil
}
// diffParser holds the running state while walking a diff line by line.
type diffParser struct {
files []FileChange
cur *FileChange
hunk *Hunk
// oldRem/newRem are the old/new lines still expected in the current hunk,
// from its "@@" header. They bound the hunk body so trailing blank lines
// (e.g. the artifact of the diff's final newline) are not swallowed.
oldRem, newRem int
}
func (p *diffParser) flush() {
if p.cur != nil {
p.files = append(p.files, *p.cur)
}
p.cur = nil
p.hunk = nil
}
func (p *diffParser) newFile() *FileChange {
p.flush()
p.cur = &FileChange{Type: Modified}
return p.cur
}
// ensure returns the current file, starting one if none is open.
func (p *diffParser) ensure() *FileChange {
if p.cur == nil {
return p.newFile()
}
return p.cur
}
func (p *diffParser) consume(line string) {
switch {
case p.header(line):
case p.hunkStart(line):
default:
p.body(line)
}
}
// header dispatches the file-level header lines; it returns false for anything
// that is not a header so the caller can try the hunk and body handlers.
func (p *diffParser) header(line string) bool {
return p.gitLine(line) ||
p.modeLine(line) ||
p.renameCopyLine(line) ||
p.binaryLine(line) ||
p.pathLine(line)
}
func (p *diffParser) gitLine(line string) bool {
if !strings.HasPrefix(line, "diff --git ") {
return false
}
f := p.newFile()
f.OldPath, f.NewPath = pathsFromGitHeader(line)
return true
}
func (p *diffParser) modeLine(line string) bool {
switch {
case strings.HasPrefix(line, "new file mode "):
f := p.ensure()
f.Type = Added
f.NewMode = strings.TrimSpace(strings.TrimPrefix(line, "new file mode "))
case strings.HasPrefix(line, "deleted file mode "):
f := p.ensure()
f.Type = Deleted
f.OldMode = strings.TrimSpace(strings.TrimPrefix(line, "deleted file mode "))
case strings.HasPrefix(line, "old mode "):
p.ensure().OldMode = strings.TrimSpace(strings.TrimPrefix(line, "old mode "))
case strings.HasPrefix(line, "new mode "):
p.ensure().NewMode = strings.TrimSpace(strings.TrimPrefix(line, "new mode "))
default:
return false
}
return true
}
func (p *diffParser) renameCopyLine(line string) bool {
switch {
case strings.HasPrefix(line, "rename from "):
f := p.ensure()
f.Type = Renamed
f.OldPath = unquotePath(strings.TrimPrefix(line, "rename from "))
case strings.HasPrefix(line, "rename to "):
f := p.ensure()
f.Type = Renamed
f.NewPath = unquotePath(strings.TrimPrefix(line, "rename to "))
case strings.HasPrefix(line, "copy from "):
f := p.ensure()
f.Type = Copied
f.OldPath = unquotePath(strings.TrimPrefix(line, "copy from "))
case strings.HasPrefix(line, "copy to "):
f := p.ensure()
f.Type = Copied
f.NewPath = unquotePath(strings.TrimPrefix(line, "copy to "))
default:
return false
}
return true
}
func (p *diffParser) binaryLine(line string) bool {
if strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "GIT binary patch") {
p.ensure().IsBinary = true
return true
}
return false
}
func (p *diffParser) pathLine(line string) bool {
switch {
case strings.HasPrefix(line, "--- "):
// A new "---" while the current file already has hunks starts the next
// file (handles plain diffs with no "diff --git" header).
if p.cur == nil || len(p.cur.Hunks) > 0 {
p.newFile()
}
p.hunk = nil
path, devnull := diffPath(line, "--- ")
p.cur.OldPath = path
if devnull {
p.cur.Type = Added
}
case strings.HasPrefix(line, "+++ "):
f := p.ensure()
path, devnull := diffPath(line, "+++ ")
f.NewPath = path
if devnull {
f.Type = Deleted
}
default:
return false
}
return true
}
// hunkStart handles an "@@" header. It reports the line as consumed even when
// the header does not parse, so junk that merely looks like a hunk header is
// not mistaken for body content.
func (p *diffParser) hunkStart(line string) bool {
if !strings.HasPrefix(line, "@@ ") {
return false
}
m := hunkRe.FindStringSubmatch(line)
if m == nil {
return true
}
f := p.ensure()
h := Hunk{
OldStart: atoi(m[1]),
OldLines: atoiDefault(m[2], 1),
NewStart: atoi(m[3]),
NewLines: atoiDefault(m[4], 1),
Section: strings.TrimSpace(m[5]),
}
f.Hunks = append(f.Hunks, h)
p.hunk = &f.Hunks[len(f.Hunks)-1]
p.oldRem, p.newRem = h.OldLines, h.NewLines
return true
}
func (p *diffParser) body(line string) {
if p.hunk == nil || p.cur == nil {
return
}
// Once the declared line counts are exhausted the hunk is over; anything
// after it (a blank line, the next commit's text) is not part of the diff.
if p.oldRem <= 0 && p.newRem <= 0 {
p.hunk = nil
return
}
switch {
case strings.HasPrefix(line, "+"):
p.hunk.Lines = append(p.hunk.Lines, Line{Kind: Add, Text: line[1:]})
p.cur.Additions++
p.newRem--
case strings.HasPrefix(line, "-"):
p.hunk.Lines = append(p.hunk.Lines, Line{Kind: Delete, Text: line[1:]})
p.cur.Deletions++
p.oldRem--
case strings.HasPrefix(line, " "):
p.hunk.Lines = append(p.hunk.Lines, Line{Kind: Context, Text: line[1:]})
p.oldRem--
p.newRem--
case strings.HasPrefix(line, "\\"):
// "\ No newline at end of file" — not a content line.
case line == "":
// A blank line with its leading space stripped: still a context line
// while the hunk has lines left to consume.
p.hunk.Lines = append(p.hunk.Lines, Line{Kind: Context, Text: ""})
p.oldRem--
p.newRem--
}
}
// statOf computes a DiffStat over parsed file changes.
func statOf(files []FileChange) DiffStat {
s := DiffStat{FilesChanged: len(files)}
for _, f := range files {
s.Additions += f.Additions
s.Deletions += f.Deletions
}
return s
}
func atoi(s string) int { n, _ := strconv.Atoi(s); return n }
func atoiDefault(s string, def int) int {
if s == "" {
return def
}
return atoi(s)
}
// pathsFromGitHeader extracts old and new paths from a "diff --git a/x b/y"
// line. The "---"/"+++" lines override these when present, so this is a
// fallback (it is also the only path source for pure-rename/mode diffs that
// carry no hunks).
func pathsFromGitHeader(line string) (oldPath, newPath string) {
rest := strings.TrimPrefix(line, "diff --git ")
// Common case: unquoted, no spaces in paths — "a/x b/y".
if !strings.HasPrefix(rest, "\"") {
if i := strings.Index(rest, " b/"); i >= 0 {
return stripABPrefix(rest[:i]), stripABPrefix(rest[i+1:])
}
}
// Fall back to splitting on the midpoint for the simple symmetric case.
fields := strings.Fields(rest)
if len(fields) == 2 {
return stripABPrefix(fields[0]), stripABPrefix(fields[1])
}
return "", ""
}
func stripABPrefix(s string) string {
s = unquotePath(s)
if len(s) >= 2 && (s[:2] == "a/" || s[:2] == "b/") {
return s[2:]
}
return s
}
// diffPath parses a path from a "--- "/"+++ " line, stripping the marker, any
// trailing tab-separated timestamp, the a//b/ prefix, and quoting. It reports
// whether the path was /dev/null.
func diffPath(line, marker string) (path string, devnull bool) {
s := strings.TrimPrefix(line, marker)
if i := strings.IndexByte(s, '\t'); i >= 0 {
s = s[:i]
}
s = strings.TrimSpace(s)
if s == "/dev/null" {
return "", true
}
return stripABPrefix(s), false
}
// unquotePath unquotes a git C-style quoted path ("a/\303\251.txt"); for an
// unquoted path it just trims surrounding space.
func unquotePath(s string) string {
s = strings.TrimSpace(s)
if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' {
if unq, err := strconv.Unquote(s); err == nil {
return unq
}
}
return s
}