使用ChatGPT对CTF题目快速逆向
2023-1-12 15:3:48 Author: BeFun安全实验室(查看原文) 阅读量:16 收藏

在我们CTF题目逆向过程中,遇到一些规模比较大的函数时,想快速理解其功能是不太容易的,我们可以丢给chatGPT去看,本文以2022年之江杯初赛的逆向题目为例,看看chatGPT的分析能力。

题目下载:

https://github.com/Inv0k3r/pwnable_files/raw/master/vrun2.exe

人工逆向

初步用ida和ghidra反编译一下部分代码,因为太长了所以只贴一部分:

_BOOL8 __fastcall func3(__int64 a1, __int64 a2, __int64 a3){  v4 = (unsigned int *)(a2 + 4);  v5 = 0i64;  v6 = a1 - a2;  v7 = a3 - a2;  v8 = 2i64;  do  {    v9 = v5 + *(unsigned int *)((char *)v4 + v6 - 4) + *(v4 - 1);    *(unsigned int *)((char *)v4 + v7 - 4) = v9;    v10 = *v4 + (v9 > 0xFFFFFFFF) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6);    *(unsigned int *)((char *)v4 + v7) = v10;    v11 = (v10 > 0xFFFFFFFF) + v4[1] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 4);    *(unsigned int *)((char *)v4 + v7 + 4) = v11;    v12 = (v11 > 0xFFFFFFFF) + v4[2] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 8);    *(unsigned int *)((char *)v4 + v7 + 8) = v12;    v13 = (v12 > 0xFFFFFFFF) + v4[3] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 12);    *(unsigned int *)((char *)v4 + v7 + 12) = v13;    v14 = (v13 > 0xFFFFFFFF) + v4[4] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 16);    *(unsigned int *)((char *)v4 + v7 + 16) = v14;    v15 = (v14 > 0xFFFFFFFF) + v4[5] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 20);    *(unsigned int *)((char *)v4 + v7 + 20) = v15;    v16 = (v15 > 0xFFFFFFFF) + v4[6] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 24);    *(unsigned int *)((char *)v4 + v7 + 24) = v16;    v17 = (v16 > 0xFFFFFFFF) + v4[7] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 28);    *(unsigned int *)((char *)v4 + v7 + 28) = v17;    v18 = (v17 > 0xFFFFFFFF) + v4[8] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 32);    *(unsigned int *)((char *)v4 + v7 + 32) = v18;    v19 = v4[9] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 36);    v4 += 16;    v20 = (v18 > 0xFFFFFFFF) + v19;    *(unsigned int *)((char *)v4 + v7 - 28) = v20;    v21 = (v20 > 0xFFFFFFFF) + *(v4 - 6) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 24);    *(unsigned int *)((char *)v4 + v7 - 24) = v21;    v22 = (v21 > 0xFFFFFFFF) + *(v4 - 5) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 20);    *(unsigned int *)((char *)v4 + v7 - 20) = v22;    v23 = (v22 > 0xFFFFFFFF) + *(v4 - 4) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 16);    *(unsigned int *)((char *)v4 + v7 - 16) = v23;    v24 = (v23 > 0xFFFFFFFF) + *(v4 - 3) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 12);    *(unsigned int *)((char *)v4 + v7 - 12) = v24;    result = v24 > 0xFFFFFFFF;    v26 = result + *(v4 - 2) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 8);    *(unsigned int *)((char *)v4 + v7 - 8) = v26;    v5 = v26 > 0xFFFFFFFF;    --v8;  }  while ( v8 );  return result;}

可以看出来代码很乱,可能有经验的逆向赛棍可以一眼看出是大数算法,但是我被里面的两个硬编码的字符串误导了一下,误以为是某种使用密钥的加密算法,最后结合调试查看数据才理清计算流程,这是我当时写的解法:

import time# 还原计算流程def calc(str1, str2):    nums1 = []    for i in range(4):        nums1.append(int('0x' + str1[i * 8 : (i + 1) * 8], 16))
nums2 = [] for i in range(4): nums2.append(int('0x' + str2[i * 8 : (i + 1) * 8], 16))
result = (((((((nums1[0] * nums2[0]) * 0x100000000) + (nums1[0] * nums2[1])) * 0x100000000) + (nums1[0] * nums2[2])) * 0x100000000) + (nums1[0] * nums2[3])) * 0x1000000000000000000000000 + \ (((((((nums1[1] * nums2[0]) * 0x100000000) + (nums1[1] * nums2[1])) * 0x100000000) + (nums1[1] * nums2[2])) * 0x100000000) + (nums1[1] * nums2[3])) * 0x10000000000000000 + \ (((((((nums1[2] * nums2[0]) * 0x100000000) + (nums1[2] * nums2[1])) * 0x100000000) + (nums1[2] * nums2[2])) * 0x100000000) + (nums1[2] * nums2[3])) * 0x100000000 + \ (((((((nums1[3] * nums2[0]) * 0x100000000) + (nums1[3] * nums2[1])) * 0x100000000) + (nums1[3] * nums2[2])) * 0x100000000) + (nums1[3] * nums2[3]))
print(hex(result)) return result
# input1 = 'AAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD'input1 = 'f34857597362863874859743a772cd73'input2 = '18975633241537485357262533468472'
a = calc(input1, input1)b = calc(input2, input1)print(hex(a - b))
a = 0x71c71c71ddddddddf5c28f5c41fdb9740c83fb72cf13579bc3b2a19061d950c9b = 0x1064e42219b1d544b312672109a6b99ae5003bd1834d750faa1f926fe4a0c06ac = 0x6162384fc42c089942b0283b3856ffd92783bfa14bc5e28c19930f207d38905freal_c = 0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873
from z3 import *
s = Solver()a = Int('a')b = Int('b')c = Int('c')d = Int('d')s.add((((((((((a * a) * 0x100000000) + (a * b)) * 0x100000000) + (a * c)) * 0x100000000) + (a * d)) * 0x1000000000000000000000000 + (((((((b * a) * 0x100000000) + (b * b)) * 0x100000000) + (b * c)) * 0x100000000) + (b * d)) * 0x10000000000000000 + (((((((c * a) * 0x100000000) + (c * b)) * 0x100000000) + (c * c)) * 0x100000000) + (c * d)) * 0x100000000 + (((((((d * a) * 0x100000000) + (d * b)) * 0x100000000) + (d * c)) * 0x100000000) + (d * d)))-((((((((0x18975633 * a) * 0x100000000) + (0x18975633 * b)) * 0x100000000) + (0x18975633 * c)) * 0x100000000) + (0x18975633 * d)) * 0x1000000000000000000000000 + (((((((0x24153748 * a) * 0x100000000) + (0x24153748 * b)) * 0x100000000) + (0x24153748 * c)) * 0x100000000) + (0x24153748 * d)) * 0x10000000000000000 + (((((((0x53572625 * a) * 0x100000000) + (0x53572625 * b)) * 0x100000000) + (0x53572625 * c)) * 0x100000000) + (0x53572625 * d)) * 0x100000000 + (((((((0x33468472 * a) * 0x100000000) + (0x33468472 * b)) * 0x100000000) + (0x33468472 * c)) * 0x100000000) + (0x33468472 * d)))) == 0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873)s.add(a > 0)s.add(b > 0)s.add(c > 0)s.add(d > 0)s.add(a < 0x100000000)s.add(b < 0x100000000)s.add(c < 0x100000000)s.add(d < 0x100000000)if s.check() == sat: result = s.model()print(result)

做完了才意识到是一个大数算法。

chatGPT

但是如果我们分别将几个函数的反编译丢给chatGPT,结果如下:

chatGPT一眼就看出来是大数算法,然后我们结合一下简单的调试可知,程序主要是把我们的输入的一个32位16进制串做如下运算:

input * input - input * 0x18975633241537485357262533468472 = 0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873
简简单单一元二次方程,直接公式法或者丢给z3:
import matha = 1b = -0x18975633241537485357262533468472c = -0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873print(hex((-b + int(math.sqrt(b * b - 4 * a * c))) // 2))from z3 import *s = Solver()a = Int('a')s.add(a * a + a * b + c == 0)s.add(a > 0)if s.check() == sat:    print(s.model())

ghidra与ida与魔法

实际上面让chatGPT推测结果的输入代码用的是ghidra反编译的代码,虽然和ida反编译出来的代码差不多,但是ida的代码丢进去之后并没有推测出功能,而ghidra反编译的代码就可以推测出上面的大数算法。

然后我找了一下之前的ida chatgpt插件,使用插件的变量自动重命名,再使用功能推测,ida也成功推测出了函数的功能:

但是另外几个函数就不太行了:

所以我魔改了一下插件,利用selenium直接启动一个chrome去跟chatgpt交互(见文章末尾),这样就可以反复提出不同要求来微调结果,并且可以发多段代码,让它从一个整体的角度去分析功能,比如先给他发了前面可以分析出大数乘法的代码,在分析出大数乘法后,再发送大数加法的代码就可以分析出来了:

而直接发送大数加法的代码就不一定能分析出来:

魔改的ChatGPT插件

地址:

https://github.com/Inv0k3r/Gepetto-ChatGPT

主要是修改了原版使用的付费api为免费的聊天栏,然后加了手动登录(防止每次打开IDA都要登录)和仅发送反编译代码的功能(便于微调)。

装好插件后在里面配置一下账号密码代理以及自动过验证码,然后就是保证装了chrome就可以用了。

听说最近chatgpt在推高级版了,希望有个更好用的接口,目前这个魔改插件作为临时使用还可以,如果有更好的接口应该可以实现一些更有意思的功能。

  • Ctrl+Alt+L 打开chrome登录chatgpt
  • Ctrl+Alt+S 发送当前反编译的C代码
  • Ctrl+Alt+G 要求ChatGPT对代码进行分析解释
  • Ctrl+Alt+R 重命名变量


文章来源: http://mp.weixin.qq.com/s?__biz=MzI3NDEzMDgzNw==&mid=2247484572&idx=1&sn=78484f08ecc889b95e81d9fca5e57bbd&chksm=eb19f666dc6e7f700c1af936d7e54a907b18c9b5058b06efbb2ee262722944df47306db7d552#rd
如有侵权请联系:admin#unsafe.sh