Skip to content

Commit 55bbafd

Browse files
committed
Replace LCS in diff3 with a faster algorithm
The LCS provided by Tony Garnock-Jones in his Synchrotron library provides an optimal solution for LCS, but the O(mn log m) runtime with a space complexity of O(mn) (Hunt-McIlroy) is outperformed by the "An O(NP) Sequence Comparison Algorithm", which was implemented by Tatsuhiko Kubo.
1 parent c470875 commit 55bbafd

3 files changed

Lines changed: 465 additions & 0 deletions

File tree

diff3.js

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
// Copyright (c) 2006, 2008 Tony Garnock-Jones <tonyg@lshift.net>
2+
// Copyright (c) 2006, 2008 LShift Ltd. <query@lshift.net>
3+
//
4+
// Permission is hereby granted, free of charge, to any person
5+
// obtaining a copy of this software and associated documentation files
6+
// (the "Software"), to deal in the Software without restriction,
7+
// including without limitation the rights to use, copy, modify, merge,
8+
// publish, distribute, sublicense, and/or sell copies of the Software,
9+
// and to permit persons to whom the Software is furnished to do so,
10+
// subject to the following conditions:
11+
//
12+
// The above copyright notice and this permission notice shall be
13+
// included in all copies or substantial portions of the Software.
14+
//
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18+
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19+
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20+
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21+
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
// SOFTWARE.
23+
24+
var onp = require('./onp');
25+
26+
function longestCommonSubsequence(file1, file2) {
27+
var diff = new onp(file1, file2);
28+
diff.compose();
29+
var ses = diff.getses();
30+
31+
var root;
32+
var prev;
33+
var file1RevIdx = file1.length - 1,
34+
file2RevIdx = file2.length - 1;
35+
for (var i = ses.length - 1; i >= 0; --i) {
36+
if (ses[i].t === diff.SES_COMMON) {
37+
if (prev) {
38+
prev.chain = {
39+
file1index: file1RevIdx,
40+
file2index: file2RevIdx,
41+
chain: null
42+
};
43+
prev = prev.chain;
44+
} else {
45+
root = {
46+
file1index: file1RevIdx,
47+
file2index: file2RevIdx,
48+
chain: null
49+
};
50+
prev = root;
51+
}
52+
file1RevIdx--;
53+
file2RevIdx--;
54+
} else if (ses[i].t === diff.SES_DELETE) {
55+
file1RevIdx--;
56+
} else if (ses[i].t === diff.SES_ADD) {
57+
file2RevIdx--;
58+
}
59+
}
60+
61+
prev.chain = {
62+
file1index: -1,
63+
file2index: -1,
64+
chain: null
65+
};
66+
67+
return root;
68+
}
69+
70+
function diffIndices(file1, file2) {
71+
// We apply the LCS to give a simple representation of the
72+
// offsets and lengths of mismatched chunks in the input
73+
// files. This is used by diff3_merge_indices below.
74+
75+
var result = [];
76+
var tail1 = file1.length;
77+
var tail2 = file2.length;
78+
79+
for (var candidate = longestCommonSubsequence(file1, file2); candidate !== null; candidate = candidate.chain) {
80+
var mismatchLength1 = tail1 - candidate.file1index - 1;
81+
var mismatchLength2 = tail2 - candidate.file2index - 1;
82+
tail1 = candidate.file1index;
83+
tail2 = candidate.file2index;
84+
85+
if (mismatchLength1 || mismatchLength2) {
86+
result.push({
87+
file1: [tail1 + 1, mismatchLength1],
88+
file2: [tail2 + 1, mismatchLength2]
89+
});
90+
}
91+
}
92+
93+
result.reverse();
94+
return result;
95+
}
96+
97+
function diff3MergeIndices(a, o, b) {
98+
// Given three files, A, O, and B, where both A and B are
99+
// independently derived from O, returns a fairly complicated
100+
// internal representation of merge decisions it's taken. The
101+
// interested reader may wish to consult
102+
//
103+
// Sanjeev Khanna, Keshav Kunal, and Benjamin C. Pierce. "A
104+
// Formal Investigation of Diff3." In Arvind and Prasad,
105+
// editors, Foundations of Software Technology and Theoretical
106+
// Computer Science (FSTTCS), December 2007.
107+
//
108+
// (http://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf)
109+
var i;
110+
111+
var m1 = diffIndices(o, a);
112+
var m2 = diffIndices(o, b);
113+
114+
var hunks = [];
115+
116+
function addHunk(h, side) {
117+
hunks.push([h.file1[0], side, h.file1[1], h.file2[0], h.file2[1]]);
118+
}
119+
for (i = 0; i < m1.length; i++) {
120+
addHunk(m1[i], 0);
121+
}
122+
for (i = 0; i < m2.length; i++) {
123+
addHunk(m2[i], 2);
124+
}
125+
hunks.sort(function(x, y) {
126+
return x[0] - y[0]
127+
});
128+
129+
var result = [];
130+
var commonOffset = 0;
131+
132+
function copyCommon(targetOffset) {
133+
if (targetOffset > commonOffset) {
134+
result.push([1, commonOffset, targetOffset - commonOffset]);
135+
commonOffset = targetOffset;
136+
}
137+
}
138+
139+
for (var hunkIndex = 0; hunkIndex < hunks.length; hunkIndex++) {
140+
var firstHunkIndex = hunkIndex;
141+
var hunk = hunks[hunkIndex];
142+
var regionLhs = hunk[0];
143+
var regionRhs = regionLhs + hunk[2];
144+
while (hunkIndex < hunks.length - 1) {
145+
var maybeOverlapping = hunks[hunkIndex + 1];
146+
var maybeLhs = maybeOverlapping[0];
147+
if (maybeLhs > regionRhs) break;
148+
regionRhs = Math.max(regionRhs, maybeLhs + maybeOverlapping[2]);
149+
hunkIndex++;
150+
}
151+
152+
copyCommon(regionLhs);
153+
if (firstHunkIndex == hunkIndex) {
154+
// The "overlap" was only one hunk long, meaning that
155+
// there's no conflict here. Either a and o were the
156+
// same, or b and o were the same.
157+
if (hunk[4] > 0) {
158+
result.push([hunk[1], hunk[3], hunk[4]]);
159+
}
160+
} else {
161+
// A proper conflict. Determine the extents of the
162+
// regions involved from a, o and b. Effectively merge
163+
// all the hunks on the left into one giant hunk, and
164+
// do the same for the right; then, correct for skew
165+
// in the regions of o that each side changed, and
166+
// report appropriate spans for the three sides.
167+
var regions = {
168+
0: [a.length, -1, o.length, -1],
169+
2: [b.length, -1, o.length, -1]
170+
};
171+
for (i = firstHunkIndex; i <= hunkIndex; i++) {
172+
hunk = hunks[i];
173+
var side = hunk[1];
174+
var r = regions[side];
175+
var oLhs = hunk[0];
176+
var oRhs = oLhs + hunk[2];
177+
var abLhs = hunk[3];
178+
var abRhs = abLhs + hunk[4];
179+
r[0] = Math.min(abLhs, r[0]);
180+
r[1] = Math.max(abRhs, r[1]);
181+
r[2] = Math.min(oLhs, r[2]);
182+
r[3] = Math.max(oRhs, r[3]);
183+
}
184+
var aLhs = regions[0][0] + (regionLhs - regions[0][2]);
185+
var aRhs = regions[0][1] + (regionRhs - regions[0][3]);
186+
var bLhs = regions[2][0] + (regionLhs - regions[2][2]);
187+
var bRhs = regions[2][1] + (regionRhs - regions[2][3]);
188+
result.push([-1,
189+
aLhs, aRhs - aLhs,
190+
regionLhs, regionRhs - regionLhs,
191+
bLhs, bRhs - bLhs
192+
]);
193+
}
194+
commonOffset = regionRhs;
195+
}
196+
197+
copyCommon(o.length);
198+
return result;
199+
}
200+
201+
function diff3Merge(a, o, b) {
202+
// Applies the output of Diff.diff3_merge_indices to actually
203+
// construct the merged file; the returned result alternates
204+
// between "ok" and "conflict" blocks.
205+
206+
var result = [];
207+
var files = [a, o, b];
208+
var indices = diff3MergeIndices(a, o, b);
209+
210+
var okLines = [];
211+
212+
function flushOk() {
213+
if (okLines.length) {
214+
result.push({
215+
ok: okLines
216+
});
217+
}
218+
okLines = [];
219+
}
220+
221+
function pushOk(xs) {
222+
for (var j = 0; j < xs.length; j++) {
223+
okLines.push(xs[j]);
224+
}
225+
}
226+
227+
function isTrueConflict(rec) {
228+
if (rec[2] != rec[6]) return true;
229+
var aoff = rec[1];
230+
var boff = rec[5];
231+
for (var j = 0; j < rec[2]; j++) {
232+
if (a[j + aoff] != b[j + boff]) return true;
233+
}
234+
return false;
235+
}
236+
237+
for (var i = 0; i < indices.length; i++) {
238+
var x = indices[i];
239+
var side = x[0];
240+
if (side == -1) {
241+
if (!isTrueConflict(x)) {
242+
pushOk(files[0].slice(x[1], x[1] + x[2]));
243+
} else {
244+
flushOk();
245+
result.push({
246+
conflict: {
247+
a: a.slice(x[1], x[1] + x[2]),
248+
aIndex: x[1],
249+
o: o.slice(x[3], x[3] + x[4]),
250+
oIndex: x[3],
251+
b: b.slice(x[5], x[5] + x[6]),
252+
bIndex: x[5]
253+
}
254+
});
255+
}
256+
} else {
257+
pushOk(files[side].slice(x[1], x[1] + x[2]));
258+
}
259+
}
260+
261+
flushOk();
262+
return result;
263+
}
264+
265+
module.exports = diff3Merge;

0 commit comments

Comments
 (0)