Skip to content

Commit 8e0c657

Browse files
committed
bench: add float8 operation benchmarks
Add BenchmarkFromFloat32, BenchmarkToFloat32_Modes, BenchmarkAddModes, BenchmarkMulModes, BenchmarkSub, and BenchmarkDiv with sub-benchmarks for algorithmic vs lookup-table paths. Record baseline results in docs/devlog.md.
1 parent ff3090d commit 8e0c657

2 files changed

Lines changed: 184 additions & 0 deletions

File tree

benchmark_test.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package float8
2+
3+
import (
4+
"testing"
5+
)
6+
7+
// BenchmarkFromFloat32 benchmarks float32 → Float8 conversion.
8+
func BenchmarkFromFloat32(b *testing.B) {
9+
b.Run("Normal", func(b *testing.B) {
10+
f32 := float32(1.5)
11+
for i := 0; i < b.N; i++ {
12+
_ = ToFloat8(f32)
13+
}
14+
})
15+
b.Run("Subnormal", func(b *testing.B) {
16+
f32 := float32(0.001953125) // smallest normal float8 boundary
17+
for i := 0; i < b.N; i++ {
18+
_ = ToFloat8(f32)
19+
}
20+
})
21+
b.Run("Zero", func(b *testing.B) {
22+
f32 := float32(0.0)
23+
for i := 0; i < b.N; i++ {
24+
_ = ToFloat8(f32)
25+
}
26+
})
27+
b.Run("Large", func(b *testing.B) {
28+
f32 := float32(448.0) // max finite float8
29+
for i := 0; i < b.N; i++ {
30+
_ = ToFloat8(f32)
31+
}
32+
})
33+
}
34+
35+
// BenchmarkToFloat32_Modes benchmarks Float8 → float32 conversion with
36+
// algorithmic and lookup-table paths.
37+
func BenchmarkToFloat32_Modes(b *testing.B) {
38+
f8 := ToFloat8(1.5)
39+
40+
b.Run("Algorithmic", func(b *testing.B) {
41+
DisableFastConversion()
42+
b.ResetTimer()
43+
for i := 0; i < b.N; i++ {
44+
_ = f8.ToFloat32()
45+
}
46+
})
47+
b.Run("Lookup", func(b *testing.B) {
48+
EnableFastConversion()
49+
b.ResetTimer()
50+
for i := 0; i < b.N; i++ {
51+
_ = f8.ToFloat32()
52+
}
53+
b.StopTimer()
54+
DisableFastConversion()
55+
})
56+
}
57+
58+
// BenchmarkAddModes benchmarks addition with algorithmic and lookup-table paths.
59+
func BenchmarkAddModes(b *testing.B) {
60+
a := ToFloat8(1.5)
61+
c := ToFloat8(2.5)
62+
63+
b.Run("Algorithmic", func(b *testing.B) {
64+
DisableFastArithmetic()
65+
b.ResetTimer()
66+
for i := 0; i < b.N; i++ {
67+
_ = Add(a, c)
68+
}
69+
})
70+
b.Run("Lookup", func(b *testing.B) {
71+
EnableFastArithmetic()
72+
b.ResetTimer()
73+
for i := 0; i < b.N; i++ {
74+
_ = Add(a, c)
75+
}
76+
b.StopTimer()
77+
DisableFastArithmetic()
78+
})
79+
}
80+
81+
// BenchmarkMulModes benchmarks multiplication with algorithmic and lookup-table paths.
82+
func BenchmarkMulModes(b *testing.B) {
83+
a := ToFloat8(1.5)
84+
c := ToFloat8(2.5)
85+
86+
b.Run("Algorithmic", func(b *testing.B) {
87+
DisableFastArithmetic()
88+
b.ResetTimer()
89+
for i := 0; i < b.N; i++ {
90+
_ = Mul(a, c)
91+
}
92+
})
93+
b.Run("Lookup", func(b *testing.B) {
94+
EnableFastArithmetic()
95+
b.ResetTimer()
96+
for i := 0; i < b.N; i++ {
97+
_ = Mul(a, c)
98+
}
99+
b.StopTimer()
100+
DisableFastArithmetic()
101+
})
102+
}
103+
104+
// BenchmarkSub benchmarks subtraction.
105+
func BenchmarkSub(b *testing.B) {
106+
a := ToFloat8(3.5)
107+
c := ToFloat8(1.5)
108+
109+
b.Run("Algorithmic", func(b *testing.B) {
110+
DisableFastArithmetic()
111+
b.ResetTimer()
112+
for i := 0; i < b.N; i++ {
113+
_ = Sub(a, c)
114+
}
115+
})
116+
b.Run("Lookup", func(b *testing.B) {
117+
EnableFastArithmetic()
118+
b.ResetTimer()
119+
for i := 0; i < b.N; i++ {
120+
_ = Sub(a, c)
121+
}
122+
b.StopTimer()
123+
DisableFastArithmetic()
124+
})
125+
}
126+
127+
// BenchmarkDiv benchmarks division.
128+
func BenchmarkDiv(b *testing.B) {
129+
a := ToFloat8(3.5)
130+
c := ToFloat8(1.5)
131+
132+
b.Run("Algorithmic", func(b *testing.B) {
133+
DisableFastArithmetic()
134+
b.ResetTimer()
135+
for i := 0; i < b.N; i++ {
136+
_ = Div(a, c)
137+
}
138+
})
139+
b.Run("Lookup", func(b *testing.B) {
140+
EnableFastArithmetic()
141+
b.ResetTimer()
142+
for i := 0; i < b.N; i++ {
143+
_ = Div(a, c)
144+
}
145+
b.StopTimer()
146+
DisableFastArithmetic()
147+
})
148+
}

docs/devlog.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Float8 Development Log
2+
3+
## 2026-03-29 -- Baseline Benchmarks
4+
5+
Recorded on Apple M4 (darwin/arm64), Go 1.25, `-benchmem -count=3`.
6+
7+
### Conversion
8+
9+
| Benchmark | ns/op | B/op | allocs/op |
10+
|-----------|------:|-----:|----------:|
11+
| FromFloat32/Normal | 2.50 | 0 | 0 |
12+
| FromFloat32/Subnormal | 2.53 | 0 | 0 |
13+
| FromFloat32/Zero | 0.98 | 0 | 0 |
14+
| FromFloat32/Large | 2.54 | 0 | 0 |
15+
| ToFloat32/Algorithmic | 1.36 | 0 | 0 |
16+
| ToFloat32/Lookup | 0.38 | 0 | 0 |
17+
18+
### Arithmetic (Algorithmic vs Lookup)
19+
20+
| Benchmark | Algorithmic ns/op | Lookup ns/op | Speedup |
21+
|-----------|------------------:|-------------:|--------:|
22+
| Add | 7.08 | 0.99 | 7.2x |
23+
| Sub | 6.91 | 0.99 | 7.0x |
24+
| Mul | 7.27 | 1.00 | 7.3x |
25+
| Div | 7.85 | 1.00 | 7.9x |
26+
27+
### Batch Operations (1000 elements)
28+
29+
| Benchmark | ns/op | B/op | allocs/op |
30+
|-----------|------:|-----:|----------:|
31+
| ToSlice8 | 3351 | 1024 | 1 |
32+
| ToSlice32 | 1592 | 4096 | 1 |
33+
34+
All operations are zero-allocation for scalar paths. Lookup tables provide
35+
a consistent ~7x speedup over algorithmic arithmetic at the cost of 256 KB
36+
of memory (4 tables x 64K entries x 1 byte).

0 commit comments

Comments
 (0)