Skip to content

Commit 156d985

Browse files
Abhishek Dubeymaddy-kerneldev
authored andcommitted
powerpc64/bpf: Implement JIT support for private stack
Provision the private stack as a per-CPU allocation during bpf_int_jit_compile(). Align the stack to 16 bytes and place guard regions at both ends to detect runtime stack overflow and underflow. Round the private stack size up to the nearest 16-byte boundary. Make each guard region 16 bytes to preserve the required overall 16-byte alignment. When private stack is set, skip bpf stack size accounting in kernel stack. There is no stack pointer in powerpc. Stack referencing during JIT is done using frame pointer. Frame pointer calculation goes like: BPF frame pointer = Priv stack allocation start address + Overflow guard + Actual stack size defined by verifier Memory layout: High Addr +--------------------------------------------------+ | | | 16 bytes Underflow guard (0xEB9F12345678eb9fULL) | | | BPF FP -> +--------------------------------------------------+ | | | Private stack - determined by verifier | | 16-bytes aligned | | | +--------------------------------------------------+ | | Lower Addr | 16 byte Overflow guard (0xEB9F12345678eb9fULL) | | | Priv stack alloc ->+--------------------------------------------------+ start Update BPF_REG_FP to point to the calculated offset within the allocated private stack buffer. Now, BPF stack usage reference in the allocated private stack. Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com> Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com> Acked-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/20260401103215.104438-1-adubey@linux.ibm.com
1 parent bd77a34 commit 156d985

3 files changed

Lines changed: 126 additions & 8 deletions

File tree

arch/powerpc/net/bpf_jit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,14 @@ struct codegen_context {
178178
bool is_subprog;
179179
bool exception_boundary;
180180
bool exception_cb;
181+
void __percpu *priv_sp;
182+
unsigned int priv_stack_size;
181183
};
182184

185+
/* Memory size & magic-value to detect private stack overflow/underflow */
186+
#define PRIV_STACK_GUARD_SZ 16
187+
#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
188+
183189
#define bpf_to_ppc(r) (ctx->b2p[r])
184190

185191
#ifdef CONFIG_PPC32

arch/powerpc/net/bpf_jit_comp.c

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,25 +129,60 @@ bool bpf_jit_needs_zext(void)
129129
return true;
130130
}
131131

132+
static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
133+
{
134+
int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
135+
u64 *stack_ptr;
136+
137+
for_each_possible_cpu(cpu) {
138+
stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
139+
stack_ptr[0] = PRIV_STACK_GUARD_VAL;
140+
stack_ptr[1] = PRIV_STACK_GUARD_VAL;
141+
stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
142+
stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
143+
}
144+
}
145+
146+
static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
147+
struct bpf_prog *fp)
148+
{
149+
int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
150+
u64 *stack_ptr;
151+
152+
for_each_possible_cpu(cpu) {
153+
stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
154+
if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
155+
stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
156+
stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
157+
stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
158+
pr_err("BPF private stack overflow/underflow detected for prog %s\n",
159+
bpf_jit_get_prog_name(fp));
160+
break;
161+
}
162+
}
163+
}
164+
132165
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
133166
{
134167
u32 proglen;
135168
u32 alloclen;
136169
u8 *image = NULL;
137-
u32 *code_base;
138-
u32 *addrs;
139-
struct powerpc_jit_data *jit_data;
170+
u32 *code_base = NULL;
171+
u32 *addrs = NULL;
172+
struct powerpc_jit_data *jit_data = NULL;
140173
struct codegen_context cgctx;
141174
int pass;
142175
int flen;
176+
int priv_stack_alloc_size;
177+
void __percpu *priv_stack_ptr = NULL;
143178
struct bpf_binary_header *fhdr = NULL;
144179
struct bpf_binary_header *hdr = NULL;
145180
struct bpf_prog *org_fp = fp;
146-
struct bpf_prog *tmp_fp;
181+
struct bpf_prog *tmp_fp = NULL;
147182
bool bpf_blinded = false;
148183
bool extra_pass = false;
149184
u8 *fimage = NULL;
150-
u32 *fcode_base;
185+
u32 *fcode_base = NULL;
151186
u32 extable_len;
152187
u32 fixup_len;
153188

@@ -173,6 +208,26 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
173208
fp->aux->jit_data = jit_data;
174209
}
175210

211+
priv_stack_ptr = fp->aux->priv_stack_ptr;
212+
if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
213+
/*
214+
* Allocate private stack of size equivalent to
215+
* verifier-calculated stack size plus two memory
216+
* guard regions to detect private stack overflow
217+
* and underflow.
218+
*/
219+
priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
220+
2 * PRIV_STACK_GUARD_SZ;
221+
priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
222+
if (!priv_stack_ptr) {
223+
fp = org_fp;
224+
goto out_priv_stack;
225+
}
226+
227+
priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
228+
fp->aux->priv_stack_ptr = priv_stack_ptr;
229+
}
230+
176231
flen = fp->len;
177232
addrs = jit_data->addrs;
178233
if (addrs) {
@@ -209,6 +264,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
209264
cgctx.is_subprog = bpf_is_subprog(fp);
210265
cgctx.exception_boundary = fp->aux->exception_boundary;
211266
cgctx.exception_cb = fp->aux->exception_cb;
267+
cgctx.priv_sp = priv_stack_ptr;
268+
cgctx.priv_stack_size = 0;
269+
if (priv_stack_ptr) {
270+
/*
271+
* priv_stack_size required for setting bpf FP inside
272+
* percpu allocation.
273+
* stack_size is marked 0 to prevent allocation on
274+
* general stack and offset calculation don't go for
275+
* a toss in bpf_jit_stack_offsetof() & bpf_jit_stack_local()
276+
*/
277+
cgctx.priv_stack_size = cgctx.stack_size;
278+
cgctx.stack_size = 0;
279+
}
212280

213281
/* Scouting faux-generate pass 0 */
214282
if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
@@ -306,6 +374,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
306374
}
307375
bpf_prog_fill_jited_linfo(fp, addrs);
308376
out_addrs:
377+
if (!image && priv_stack_ptr) {
378+
fp->aux->priv_stack_ptr = NULL;
379+
free_percpu(priv_stack_ptr);
380+
}
381+
out_priv_stack:
309382
kfree(addrs);
310383
kfree(jit_data);
311384
fp->aux->jit_data = NULL;
@@ -419,6 +492,8 @@ void bpf_jit_free(struct bpf_prog *fp)
419492
if (fp->jited) {
420493
struct powerpc_jit_data *jit_data = fp->aux->jit_data;
421494
struct bpf_binary_header *hdr;
495+
void __percpu *priv_stack_ptr;
496+
int priv_stack_alloc_size;
422497

423498
/*
424499
* If we fail the final pass of JIT (from jit_subprogs),
@@ -432,6 +507,13 @@ void bpf_jit_free(struct bpf_prog *fp)
432507
}
433508
hdr = bpf_jit_binary_pack_hdr(fp);
434509
bpf_jit_binary_pack_free(hdr, NULL);
510+
priv_stack_ptr = fp->aux->priv_stack_ptr;
511+
if (priv_stack_ptr) {
512+
priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
513+
2 * PRIV_STACK_GUARD_SZ;
514+
priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_size, fp);
515+
free_percpu(priv_stack_ptr);
516+
}
435517
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
436518
}
437519

@@ -453,6 +535,11 @@ bool bpf_jit_supports_kfunc_call(void)
453535
return IS_ENABLED(CONFIG_PPC64);
454536
}
455537

538+
bool bpf_jit_supports_private_stack(void)
539+
{
540+
return IS_ENABLED(CONFIG_PPC64);
541+
}
542+
456543
bool bpf_jit_supports_arena(void)
457544
{
458545
return IS_ENABLED(CONFIG_PPC64);

arch/powerpc/net/bpf_jit_comp64.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,24 @@ void bpf_jit_realloc_regs(struct codegen_context *ctx)
183183
{
184184
}
185185

186+
static void emit_fp_priv_stack(u32 *image, struct codegen_context *ctx)
187+
{
188+
PPC_LI64(bpf_to_ppc(BPF_REG_FP), (__force long)ctx->priv_sp);
189+
/*
190+
* Load base percpu pointer of private stack allocation.
191+
* Runtime per-cpu address = (base + data_offset) + (guard + stack_size)
192+
*/
193+
#ifdef CONFIG_SMP
194+
/* Load percpu data offset */
195+
EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
196+
offsetof(struct paca_struct, data_offset)));
197+
EMIT(PPC_RAW_ADD(bpf_to_ppc(BPF_REG_FP),
198+
bpf_to_ppc(TMP_REG_1), bpf_to_ppc(BPF_REG_FP)));
199+
#endif
200+
EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), bpf_to_ppc(BPF_REG_FP),
201+
PRIV_STACK_GUARD_SZ + round_up(ctx->priv_stack_size, 16)));
202+
}
203+
186204
/*
187205
* For exception boundary & exception_cb progs:
188206
* return increased size to accommodate additional NVRs.
@@ -307,9 +325,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
307325
* Exception_cb not restricted from using stack area or arena.
308326
* Setup frame pointer to point to the bpf stack area
309327
*/
310-
if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
311-
EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
312-
STACK_FRAME_MIN_SIZE + ctx->stack_size));
328+
if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
329+
if (ctx->priv_sp) {
330+
/* Set up fp in private stack */
331+
emit_fp_priv_stack(image, ctx);
332+
} else {
333+
/* Setup frame pointer to point to the bpf stack area */
334+
EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
335+
STACK_FRAME_MIN_SIZE + ctx->stack_size));
336+
}
337+
}
313338

314339
if (ctx->arena_vm_start)
315340
PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);

0 commit comments

Comments
 (0)