Skip to content

Commit 89aff19

Browse files
committed
some refactoring +++ fix and test 128 bit subtraction
1 parent 6016ea7 commit 89aff19

3 files changed

Lines changed: 225 additions & 73 deletions

File tree

src/main/java/de/tilman_neumann/jml/base/Uint128.java

Lines changed: 101 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
import de.tilman_neumann.util.Ensure;
2222

2323
/**
24-
* An incomplete 128 bit unsigned int implementation.
24+
* An incomplete 128 bit integer implementation.
2525
*
26-
* Implementation notes:
27-
* * a+Long.MIN_VALUE <> b+Long-MIN_VALUE is an inlined compareUnsigned(a, b) <> 0.
26+
* Implementation note:
27+
* r_lo+Long.MIN_VALUE < low+Long.MIN_VALUE is an inlined compareUnsigned(r_lo, low) < 0.
2828
*
2929
* @author Tilman Neumann
3030
*/
@@ -51,64 +51,88 @@ public long getLow() {
5151
}
5252

5353
/**
54-
* Add two unsigned 128 bit integers.
55-
* @param other
56-
* @return this + other
54+
* Add two 128 bit integers.
55+
* @param b
56+
* @return this + b
5757
*/
58-
public Uint128 add_v1(Uint128 other) {
59-
// We know for sure that low overflows if both low and o_lo are 64 bit. If only one of the input 'low's
60-
// is 64 bit, then we can recognize an overflow if the result.lo is not 64 bit.
61-
final long o_lo = other.getLow();
62-
final long o_hi = other.getHigh();
63-
final long r_lo = low + o_lo;
64-
long r_hi = high + o_hi;
65-
if ((low<0 && o_lo<0) || ((low<0 || o_lo<0) && (r_lo >= 0))) r_hi++;
58+
public Uint128 add_v1(Uint128 b) {
59+
// We know for sure that low overflows if both low and b_lo are 64 bit. If only one of the input 'low's
60+
// is 64 bit, then we can recognize an overflow if r_lo is not 64 bit.
61+
final long b_lo = b.getLow();
62+
final long b_hi = b.getHigh();
63+
final long r_lo = low + b_lo;
64+
long r_hi = high + b_hi;
65+
if ((low<0 && b_lo<0) || ((low<0 || b_lo<0) && (r_lo >= 0))) r_hi++;
6666
return new Uint128(r_hi, r_lo);
6767
}
6868

6969
/**
70-
* Add two unsigned 128 bit integers.
70+
* Add two 128 bit integers.
7171
*
72-
* Simpler carry recognition and thus much faster than the first version,
73-
* thanks to Ben, see https://www.mersenneforum.org/showpost.php?p=524300&postcount=173.
72+
* Simpler carry recognition thanks to Ben Buhrow,
73+
* see https://www.mersenneforum.org/showpost.php?p=524300&postcount=173.
7474
*
75-
* @param other
76-
* @return this + other
75+
* @param b
76+
* @return this + b
7777
*/
78-
public Uint128 add/*_v2*/(Uint128 other) {
79-
long a = low + other.getLow();
80-
long b = high + other.getHigh();
81-
if (a+Long.MIN_VALUE < low+Long.MIN_VALUE) b++;
82-
return new Uint128(b, a);
78+
public Uint128 add/*_v2*/(Uint128 b) {
79+
long r_lo = low + b.getLow();
80+
long r_hi = high + b.getHigh();
81+
if (r_lo+Long.MIN_VALUE < low+Long.MIN_VALUE) r_hi++;
82+
return new Uint128(r_hi, r_lo);
8383
}
8484

8585
/**
86-
* Compute the sum of this and other, return the high part.
87-
* @param other
88-
* @return high part of this + other
86+
* Add two 128 bit integers, AI-generated version.
87+
*
88+
* @param b
89+
* @return this + b
8990
*/
90-
public long add_getHigh(Uint128 other) {
91-
long a = low + other.getLow();
92-
long b = high + other.getHigh();
93-
return (a+Long.MIN_VALUE < low+Long.MIN_VALUE) ? b + 1 : b;
91+
public Uint128 add_v3(Uint128 b) {
92+
long r_lo = low + b.getLow();
93+
long carry = Long.compareUnsigned(r_lo, low) < 0 ? 1 : 0;
94+
long r_hi = high + b.getHigh() + carry;
95+
return new Uint128(r_hi, r_lo);
96+
}
97+
98+
/**
99+
* Add two 128 bit integers, return the high part.
100+
* @param b
101+
* @return high part of this + b
102+
*/
103+
public long add_getHigh(Uint128 b) {
104+
long r_lo = low + b.getLow();
105+
long r_hi = high + b.getHigh();
106+
return r_lo+Long.MIN_VALUE < low+Long.MIN_VALUE ? r_hi + 1 : r_hi;
94107
}
95108

96109
/**
97-
* Subtract two unsigned 128 bit integers.
110+
* Subtract two 128 bit integers.
98111
*
99-
* @param other
100-
* @return this - other
112+
* @param b
113+
* @return this - b, may be negative
101114
*/
102-
// XXX experimental, probably wrong...
103-
public Uint128 subtract(Uint128 other) {
104-
long r_lo = low - other.getLow();
105-
long r_hi = high - other.getHigh();
106-
// check for underflow of low 64 bits, subtract carry to high
107-
if (Long.compareUnsigned(r_lo, low) > 0) {
108-
--r_hi;
109-
}
115+
public Uint128 subtract(Uint128 b) {
116+
long b_lo = b.getLow();
117+
long r_lo = low - b_lo;
118+
long r_hi = high - b.getHigh();
119+
if (Long.compareUnsigned(low, b_lo) < 0) --r_hi;
110120
return new Uint128(r_hi, r_lo);
111121
}
122+
123+
/**
124+
* Subtract two 128 bit integers. AI-generated version.
125+
*
126+
* @param b
127+
* @return this - b, may be negative
128+
*/
129+
public Uint128 subtract_v2(Uint128 b) {
130+
long b_lo = b.getLow();
131+
long r_lo = low - b_lo;
132+
long borrow = Long.compareUnsigned(low, b_lo) < 0 ? 1 : 0;
133+
long r_hi = high - b.getHigh() - borrow;
134+
return new Uint128(r_hi, r_lo);
135+
}
112136

113137
/**
114138
* Multiplication of unsigned 63 bit integers,
@@ -547,17 +571,48 @@ public long and(long other) {
547571
public double doubleValue() {
548572
return toBigInteger().doubleValue(); // TODO more efficient solution
549573
}
574+
575+
public double doubleValueUnsigned() {
576+
return toBigIntegerUnsigned().doubleValue(); // TODO more efficient solution
577+
}
550578

551579
/**
552-
* Convert this to BigInteger.
553-
* @return this unsigned 128 bit integer converted to BigInteger
580+
* Signed conversion to BigInteger.
581+
* @return this as a signed 127 bit integer converted to BigInteger
554582
*/
555583
public BigInteger toBigInteger() {
556-
return new BigInteger(Long.toBinaryString(high), 2).shiftLeft(64).add(new BigInteger(Long.toBinaryString(low), 2));
584+
return BigInteger.valueOf(high).shiftLeft(64).or(toBigIntegerUnsigned(low));
585+
}
586+
587+
/**
588+
* Unsigned conversion to BigInteger.
589+
* @return this as an unsigned 128 bit integer converted to BigInteger
590+
*/
591+
public BigInteger toBigIntegerUnsigned() {
592+
return toBigIntegerUnsigned(high).shiftLeft(64).or(toBigIntegerUnsigned(low));
593+
}
594+
595+
// helper method
596+
private static BigInteger toBigIntegerUnsigned(long n) {
597+
BigInteger big = BigInteger.valueOf(n & Long.MAX_VALUE); // drop sign bit
598+
if (n < 0) {
599+
big = big.setBit(63); // now big is unsigned 64 bit
600+
}
601+
return big;
557602
}
558603

604+
/**
605+
* @return a string representing this as a signed integer
606+
*/
559607
@Override
560608
public String toString() {
561-
return toBigInteger().toString();
609+
return toBigInteger().toString(); // TODO more efficient solution
610+
}
611+
612+
/**
613+
* @return a string representing this as an unsigned integer
614+
*/
615+
public String toStringUnsigned() {
616+
return toBigIntegerUnsigned().toString(); // TODO more efficient solution
562617
}
563618
}

src/test/java/de/tilman_neumann/jml/base/Uint128PerformanceTest.java

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ public class Uint128PerformanceTest {
2626
private static final Random RNG = new Random();
2727

2828
private static void testPerformance() {
29-
// Performance tests are carried out in double loops over the same numbers.
29+
// Performance tests of 2-argument methods are carried out in double loops over the same numbers.
3030
// Otherwise number creation is much more expensive than testing the operations themselves.
3131
int NCOUNT = 300000;
32+
int NCOUNT_MUL = 40000;
3233
int NCOUNT_DIV = 20000;
3334

3435
// set up test numbers
@@ -42,15 +43,31 @@ private static void testPerformance() {
4243
a128_arr[i] = new Uint128(a_arr[i], b_arr[i]);
4344
}
4445

45-
// test performance of add implementations
46+
// test performance of conversion
4647

4748
long t0 = System.currentTimeMillis();
49+
for (int i=0; i<NCOUNT; i++) {
50+
a128_arr[i].toBigInteger();
51+
}
52+
long t1 = System.currentTimeMillis();
53+
LOG.info("toBigInteger took " + (t1-t0) + "ms");
54+
55+
t0 = System.currentTimeMillis();
56+
for (int i=0; i<NCOUNT; i++) {
57+
a128_arr[i].toBigIntegerUnsigned();
58+
}
59+
t1 = System.currentTimeMillis();
60+
LOG.info("toBigIntegerUnsigned took " + (t1-t0) + "ms");
61+
62+
// test performance of add implementations
63+
64+
t0 = System.currentTimeMillis();
4865
for (int i=0; i<NCOUNT; i++) {
4966
for (int j=0; j<NCOUNT; j++) {
5067
a128_arr[i].add_v1(a128_arr[j]);
5168
}
5269
}
53-
long t1 = System.currentTimeMillis();
70+
t1 = System.currentTimeMillis();
5471
LOG.info("add_v1 took " + (t1-t0) + "ms");
5572

5673
t0 = System.currentTimeMillis();
@@ -61,45 +78,97 @@ private static void testPerformance() {
6178
}
6279
t1 = System.currentTimeMillis();
6380
LOG.info("add_v2 took " + (t1-t0) + "ms");
64-
65-
// test performance of mul64 implementations
66-
81+
6782
t0 = System.currentTimeMillis();
6883
for (int i=0; i<NCOUNT; i++) {
6984
for (int j=0; j<NCOUNT; j++) {
70-
Uint128.mul64_v1(a_arr[i], a_arr[j]);
85+
a128_arr[i].add_v3(a128_arr[j]);
7186
}
7287
}
7388
t1 = System.currentTimeMillis();
74-
LOG.info("mul64_v1 took " + (t1-t0) + "ms");
75-
89+
LOG.info("add_v3 took " + (t1-t0) + "ms");
90+
7691
t0 = System.currentTimeMillis();
7792
for (int i=0; i<NCOUNT; i++) {
7893
for (int j=0; j<NCOUNT; j++) {
79-
Uint128.mul64/*_v2*/(a_arr[i], a_arr[j]);
94+
a128_arr[i].subtract(a128_arr[j]);
8095
}
8196
}
8297
t1 = System.currentTimeMillis();
83-
LOG.info("mul64_v2 took " + (t1-t0) + "ms");
84-
98+
LOG.info("subtract took " + (t1-t0) + "ms");
99+
85100
t0 = System.currentTimeMillis();
86101
for (int i=0; i<NCOUNT; i++) {
87102
for (int j=0; j<NCOUNT; j++) {
88-
Uint128.mul64_v3(a_arr[i], a_arr[j]);
103+
a128_arr[i].subtract_v2(a128_arr[j]);
104+
}
105+
}
106+
t1 = System.currentTimeMillis();
107+
LOG.info("subtract_v2 took " + (t1-t0) + "ms");
108+
109+
// Test performance of mul64 implementations:
110+
// Here we need to do something with the results to avoid the compiler optimizing thhe tests to nothing
111+
112+
long r = 0;
113+
t0 = System.currentTimeMillis();
114+
for (int i=0; i<NCOUNT_MUL; i++) {
115+
for (int j=0; j<NCOUNT_MUL; j++) {
116+
Uint128 result = Uint128.mul63(a_arr[i], a_arr[j]);
117+
r += result.getHigh() + result.getLow();
118+
}
119+
}
120+
t1 = System.currentTimeMillis();
121+
LOG.info("mul63 took " + (t1-t0) + "ms");
122+
LOG.trace("r = " + r);
123+
124+
r = 0;
125+
t0 = System.currentTimeMillis();
126+
for (int i=0; i<NCOUNT_MUL; i++) {
127+
for (int j=0; j<NCOUNT_MUL; j++) {
128+
Uint128 result = Uint128.mul64_v1(a_arr[i], a_arr[j]);
129+
r += result.getHigh() + result.getLow();
130+
}
131+
}
132+
t1 = System.currentTimeMillis();
133+
LOG.info("mul64_v1 took " + (t1-t0) + "ms");
134+
LOG.trace("r = " + r);
135+
136+
r = 0;
137+
t0 = System.currentTimeMillis();
138+
for (int i=0; i<NCOUNT_MUL; i++) {
139+
for (int j=0; j<NCOUNT_MUL; j++) {
140+
Uint128 result = Uint128.mul64/*_v2*/(a_arr[i], a_arr[j]);
141+
r += result.getHigh() + result.getLow();
142+
}
143+
}
144+
t1 = System.currentTimeMillis();
145+
LOG.info("mul64_v2 took " + (t1-t0) + "ms");
146+
LOG.trace("r = " + r);
147+
148+
r = 0;
149+
t0 = System.currentTimeMillis();
150+
for (int i=0; i<NCOUNT_MUL; i++) {
151+
for (int j=0; j<NCOUNT_MUL; j++) {
152+
Uint128 result = Uint128.mul64_v3(a_arr[i], a_arr[j]);
153+
r += result.getHigh() + result.getLow();
89154
}
90155
}
91156
t1 = System.currentTimeMillis();
92157
LOG.info("mul64_v3 took " + (t1-t0) + "ms");
158+
LOG.trace("r = " + r);
93159

160+
r = 0;
94161
t0 = System.currentTimeMillis();
95-
for (int i=0; i<NCOUNT; i++) {
96-
for (int j=0; j<NCOUNT; j++) {
97-
Uint128.mul64_MH(a_arr[i], a_arr[j]);
162+
for (int i=0; i<NCOUNT_MUL; i++) {
163+
for (int j=0; j<NCOUNT_MUL; j++) {
164+
Uint128 result = Uint128.mul64_MH(a_arr[i], a_arr[j]);
165+
r += result.getHigh() + result.getLow();
98166
}
99167
}
100168
t1 = System.currentTimeMillis();
101169
LOG.info("mul64_MH took " + (t1-t0) + "ms");
102-
170+
LOG.trace("r = " + r);
171+
103172
// test performance of 128 / 64 bit division and modulus
104173

105174
t0 = System.currentTimeMillis();

0 commit comments

Comments
 (0)