Skip to content

Commit b6276f8

Browse files
committed
performance and bug fixes
1 parent 0005ff6 commit b6276f8

6 files changed

Lines changed: 74 additions & 44 deletions

File tree

Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,19 @@ library_less_strict: clean
361361
$(CC) $(CFLAGS) $(LIBRARY) $(OPTIMIZE) $(OS_FLAGS) $(C_SRCS) -o $(BUILD_DIR)/$(LIBNAME)
362362
$(STRIP)
363363

364+
## Build a performance-optimized library with the most expensive security
365+
## features disabled. Intended for benchmarking and performance measurement.
366+
## All other flags inherit from the top-level defaults.
367+
library_benchmark: DISABLE_CANARY = -DDISABLE_CANARY=1
368+
library_benchmark: PRE_POPULATE_PAGES = -DPRE_POPULATE_PAGES=1
369+
library_benchmark: RANDOMIZE_FREELIST = -DRANDOMIZE_FREELIST=0
370+
library_benchmark: MASK_PTRS = -DMASK_PTRS=0
371+
library_benchmark: ABORT_ON_UNOWNED_PTR = -DABORT_ON_UNOWNED_PTR=0
372+
library_benchmark: clean
373+
@echo "make library_benchmark"
374+
$(CC) $(CFLAGS) $(LIBRARY) $(OPTIMIZE) $(OS_FLAGS) $(C_SRCS) -o $(BUILD_DIR)/$(LIBNAME)
375+
$(STRIP)
376+
364377
## Build a debug version of the library
365378
library_debug: clean
366379
@echo "make library debug"

PERFORMANCE.md

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -153,35 +153,37 @@ iso_realloc/iso_free 1834784 tests completed in 0.901481 seconds
153153
The following benchmarks were collected from [mimalloc-bench](https://github.com/daanx/mimalloc-bench) with the default configuration of IsoAlloc. As you can see from the data IsoAlloc is competitive with other allocators for some benchmarks but clearly falls behind on others. For any benchmark that IsoAlloc scores poorly on I was able to tweak its build to improve the CPU time and memory consumption. It's worth noting that IsoAlloc was able to stay competitive even with performing many security checks not present in other allocators. Please note these are 'best case' measurements, not averages.
154154

155155
```
156+
make library_benchmark
157+
156158
#------------------------------------------------------------------
157159
# test alloc time rss user sys page-faults page-reclaims
158-
cfrac je 03.07 4560 3.06 0.00 0 455
159-
cfrac mi 02.92 2676 2.92 0.00 0 348
160-
cfrac iso 05.16 30764 5.08 0.08 0 7497
160+
cfrac je 03.07 4552 3.06 0.00 0 454
161+
cfrac mi 02.97 2484 2.96 0.00 0 347
162+
cfrac iso 04.78 30612 4.69 0.09 0 7503
161163
162-
espresso je 02.49 5032 2.48 0.00 0 550
163-
espresso mi 02.47 3004 2.45 0.01 0 3636
164-
espresso iso 03.25 69124 3.16 0.09 0 30105
164+
espresso je 02.51 4872 2.50 0.01 0 540
165+
espresso mi 02.43 3032 2.42 0.01 0 3630
166+
espresso iso 03.16 69608 3.07 0.07 0 30334
165167
166-
barnes je 01.71 59916 1.68 0.02 0 16684
167-
barnes mi 01.64 57864 1.61 0.02 0 16550
168-
barnes iso 01.65 74968 1.61 0.03 0 20851
168+
barnes je 01.71 59900 1.67 0.03 0 16686
169+
barnes mi 01.65 57672 1.62 0.02 0 16550
170+
barnes iso 01.65 74812 1.62 0.03 0 20849
169171
170-
gs je 00.15 37756 0.13 0.01 0 5812
171-
gs mi 00.15 33668 0.14 0.01 0 5110
172-
gs iso 00.23 67960 0.16 0.06 0 18846
172+
gs je 00.17 37748 0.15 0.01 0 5814
173+
gs mi 00.16 33888 0.14 0.01 0 5109
174+
gs iso 00.22 68136 0.15 0.06 0 18916
173175
174-
larsonN je 1.153 269184 98.81 1.00 0 419378
175-
larsonN mi 1.037 301044 99.34 0.41 0 83267
176-
larsonN iso 1304.061 121072 6.10 70.16 0 30031
176+
larsonN je 1.188 261884 98.91 0.92 0 421848
177+
larsonN mi 1.016 299752 99.53 0.38 0 80202
178+
larsonN iso 1328.904 121096 6.15 69.78 0 30219
177179
178-
rocksdb je 02.49 162976 2.09 0.60 0 38215
179-
rocksdb mi 02.22 160392 1.86 0.54 0 37563
180-
rocksdb iso 02.87 197548 2.58 0.59 0 46899
180+
rocksdb je 02.46 162340 2.05 0.63 0 38383
181+
rocksdb mi 02.33 160156 1.92 0.63 0 37585
182+
rocksdb iso 02.96 195948 2.64 0.66 0 46584
181183
182-
redis je 3.319 9484 0.14 0.02 0 1540
183-
redis mi 2.840 7124 0.12 0.02 0 1254
184-
redis iso 7.340 49712 0.34 0.04 0 14959
184+
redis je 3.160 9492 0.13 0.02 0 1528
185+
redis mi 2.780 7084 0.12 0.02 0 1257
186+
redis iso 7.579 50516 0.35 0.05 0 15187
185187
```
186188

187189
IsoAlloc isn't quite ready for performance sensitive server workloads. However it's more than fast enough for client side mobile/desktop applications with risky C/C++ attack surfaces. These environments have threat models similar to what IsoAlloc was designed for.

include/iso_alloc_internal.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ INTERNAL_HIDDEN INLINE void populate_zone_cache(iso_alloc_zone_t *zone);
376376
INTERNAL_HIDDEN INLINE void flush_chunk_quarantine(void);
377377
INTERNAL_HIDDEN INLINE void clear_zone_cache(void);
378378
INTERNAL_HIDDEN iso_alloc_big_zone_t *iso_find_big_zone(void *p, bool remove);
379-
INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size);
379+
INTERNAL_HIDDEN FLATTEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size);
380380
INTERNAL_HIDDEN iso_alloc_zone_t *find_suitable_zone(size_t size);
381381
INTERNAL_HIDDEN iso_alloc_zone_t *iso_new_zone(size_t size, bool internal);
382382
INTERNAL_HIDDEN iso_alloc_zone_t *_iso_new_zone(size_t size, bool internal, int32_t index);
@@ -385,7 +385,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_range(void *p);
385385
INTERNAL_HIDDEN iso_alloc_zone_t *search_chunk_lookup_table(const void *p);
386386
INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone);
387387
INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot(iso_alloc_zone_t *zone);
388-
INTERNAL_HIDDEN bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone);
388+
INTERNAL_HIDDEN INLINE bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone);
389389
INTERNAL_HIDDEN iso_alloc_root *iso_alloc_new_root(void);
390390
INTERNAL_HIDDEN bool is_pow2(uint64_t sz);
391391
INTERNAL_HIDDEN bool _is_zone_retired(iso_alloc_zone_t *zone);
@@ -416,7 +416,7 @@ INTERNAL_HIDDEN void *_untag_ptr(void *p, iso_alloc_zone_t *zone);
416416
INTERNAL_HIDDEN void _free_big_zone_list(iso_alloc_big_zone_t *head);
417417
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_big_alloc(size_t size);
418418
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t size);
419-
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone);
419+
INTERNAL_HIDDEN INLINE ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone);
420420
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_calloc(size_t nmemb, size_t size);
421421
INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n, bool poison);
422422
INTERNAL_HIDDEN INLINE uint64_t us_rand_uint64(uint64_t *seed);

src/iso_alloc.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ INTERNAL_HIDDEN void fill_free_bit_slots(iso_alloc_zone_t *zone) {
577577
}
578578

579579
bit_slot_t *free_bit_slots = zone->free_bit_slots;
580-
__iso_memset(free_bit_slots, BAD_BIT_SLOT, ZONE_FREE_LIST_SZ);
580+
__iso_memset(free_bit_slots, BAD_BIT_SLOT, sizeof(zone->free_bit_slots));
581581
zone->free_bit_slots_usable = 0;
582582
free_bit_slot_t free_bit_slots_index;
583583

@@ -668,9 +668,9 @@ INTERNAL_HIDDEN INLINE void insert_free_bit_slot(iso_alloc_zone_t *zone, int64_t
668668
zone->is_full = false;
669669
}
670670

671-
INTERNAL_HIDDEN bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone) {
671+
INTERNAL_HIDDEN INLINE bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone) {
672672
if(zone->free_bit_slots_usable >= ZONE_FREE_LIST_SZ ||
673-
zone->free_bit_slots_usable > zone->free_bit_slots_index) {
673+
zone->free_bit_slots_usable >= zone->free_bit_slots_index) {
674674
return BAD_BIT_SLOT;
675675
}
676676

@@ -783,7 +783,7 @@ INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone)
783783
return BAD_BIT_SLOT;
784784
}
785785

786-
INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size) {
786+
INTERNAL_HIDDEN FLATTEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size) {
787787
#if CPU_PIN
788788
if(zone->cpu_core != _iso_getcpu()) {
789789
return false;
@@ -947,7 +947,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *find_suitable_zone(size_t size) {
947947
return NULL;
948948
}
949949

950-
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone) {
950+
INTERNAL_HIDDEN INLINE ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone) {
951951
const bitmap_index_t dwords_to_bit_slot = (bitslot >> BITS_PER_QWORD_SHIFT);
952952
const int64_t which_bit = WHICH_BIT(bitslot);
953953

@@ -1019,7 +1019,7 @@ INTERNAL_HIDDEN INLINE void populate_zone_cache(iso_alloc_zone_t *zone) {
10191019
_zone_cache_count++;
10201020
} else {
10211021
/* Evict oldest entry (index 0) via FIFO: shift all entries down by one */
1022-
memmove(&tzc[0], &tzc[1], (ZONE_CACHE_SZ - 1) * sizeof(_tzc));
1022+
__iso_memmove(&tzc[0], &tzc[1], (ZONE_CACHE_SZ - 1) * sizeof(_tzc));
10231023
_zone_cache_count = ZONE_CACHE_SZ - 1;
10241024
tzc[_zone_cache_count].zone = zone;
10251025
tzc[_zone_cache_count].chunk_size = zone->chunk_size;
@@ -1030,32 +1030,30 @@ INTERNAL_HIDDEN INLINE void populate_zone_cache(iso_alloc_zone_t *zone) {
10301030
}
10311031

10321032
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_calloc(size_t nmemb, size_t size) {
1033-
unsigned int res;
1033+
size_t res;
10341034

10351035
if(size < SMALLEST_CHUNK_SZ) {
10361036
size = SMALLEST_CHUNK_SZ;
10371037
} else if((size % SZ_ALIGNMENT) != 0) {
10381038
size = ALIGN_SZ_UP(size);
10391039
}
10401040

1041-
size_t sz = nmemb * size;
1042-
1043-
if(sz < size || UNLIKELY(__builtin_mul_overflow(nmemb, size, &res))) {
1044-
LOG("Call to calloc() will overflow nmemb=%d size=%d = %u", nmemb, size, nmemb * size);
1041+
if(UNLIKELY(__builtin_mul_overflow(nmemb, size, &res)) || UNLIKELY(res > BIG_SZ_MAX)) {
1042+
LOG("Call to calloc() will overflow nmemb=%zu size=%zu", nmemb, size);
10451043
return NULL;
10461044
}
10471045

1048-
void *p = _iso_alloc(NULL, sz);
1046+
void *p = _iso_alloc(NULL, res);
10491047

10501048
#if NO_ZERO_ALLOCATIONS
10511049
/* Without this check we would immediately segfault in
10521050
* the call to __iso_memset() to zeroize the chunk */
1053-
if(UNLIKELY(sz == 0)) {
1051+
if(UNLIKELY(res == 0)) {
10541052
return p;
10551053
}
10561054
#endif
10571055

1058-
__iso_memset(p, 0x0, sz);
1056+
__iso_memset(p, 0x0, res);
10591057
return p;
10601058
}
10611059

@@ -1120,8 +1118,8 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t s
11201118
}
11211119

11221120
#if ALLOC_SANITY
1123-
/* We don't sample if we are allocating from a private zone */
1124-
if(zone != NULL && zone->internal == true) {
1121+
/* We only sample if a zone was not directly passed */
1122+
if(zone != NULL) {
11251123
if(size < g_page_size && _sane_sampled < MAX_SANE_SAMPLES) {
11261124
/* If we chose to sample this allocation then
11271125
* _iso_alloc_sample will call UNLOCK_ROOT() */
@@ -1760,7 +1758,8 @@ INTERNAL_HIDDEN iso_alloc_big_zone_t *iso_find_big_zone(void *p, bool remove) {
17601758
LOCK_BIG_ZONE_USED();
17611759

17621760
if(_root->big_zone_used == NULL) {
1763-
LOG_AND_ABORT("There are no big zones allocated");
1761+
UNLOCK_BIG_ZONE_USED();
1762+
return NULL;
17641763
}
17651764

17661765
iso_alloc_big_zone_t *big_zone = _root->big_zone_used;

src/iso_alloc_interfaces.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,13 @@ EXTERNAL_API FLATTEN NO_DISCARD REALLOC_SIZE ASSUME_ALIGNED void *iso_realloc(vo
9595
}
9696

9797
EXTERNAL_API FLATTEN NO_DISCARD MALLOC_ATTR REALLOC_SIZE ASSUME_ALIGNED void *iso_reallocarray(void *p, size_t nmemb, size_t size) {
98-
unsigned int res;
98+
size_t res;
9999

100-
if(__builtin_mul_overflow(nmemb, size, &res)) {
100+
if(__builtin_mul_overflow(nmemb, size, &res) || res > BIG_SZ_MAX) {
101101
return NULL;
102102
}
103103

104-
return iso_realloc(p, nmemb * size);
104+
return iso_realloc(p, res);
105105
}
106106

107107
EXTERNAL_API FLATTEN NO_DISCARD ASSUME_ALIGNED char *iso_strdup(const char *str) {

src/iso_alloc_util.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,22 @@ void *mmap_pages(size_t size, bool populate, const char *name, int32_t prot) {
153153

154154
p = mmap(p, sz, prot, flags, fd, 0);
155155

156+
#if __linux__ && MAP_HUGETLB && HUGE_PAGES
157+
/* If the huge page allocation failed, retry with regular pages.
158+
* This can happen when /proc/sys/vm/nr_hugepages is 0 or
159+
* exhausted, which is common in LD_PRELOAD environments. */
160+
if(p == MAP_FAILED && (flags & MAP_HUGETLB)) {
161+
flags &= ~MAP_HUGETLB;
162+
p = mmap(p, sz, prot, flags, fd, 0);
163+
}
164+
#elif __APPLE__ && VM_FLAGS_SUPERPAGE_SIZE_2MB && HUGE_PAGES
165+
/* Same fallback for macOS superpage allocations */
166+
if(p == MAP_FAILED && fd == VM_FLAGS_SUPERPAGE_SIZE_2MB) {
167+
fd = -1;
168+
p = mmap(p, sz, prot, flags, fd, 0);
169+
}
170+
#endif
171+
156172
if(p == MAP_FAILED) {
157173
LOG_AND_ABORT("Failed to mmap rw pages");
158174
}

0 commit comments

Comments
 (0)