在安全研究领域,我们经常遇到经过精心混淆或加密的恶意代码样本。这些样本往往会在运行时解密出真正的恶意载荷,传统的静态分析方法对此束手无策。本文将介绍如何利用Python和Unicorn Engine构建一个动态分析工具,在不执行恶意代码的情况下,解密并分析内存中的Shellcode。
Unicorn Engine是一个轻量级的多架构CPU仿真框架,基于QEMU开发但提供了更简洁的API和更强大的功能。它支持包括x86、ARM、MIPS等在内的多种架构,特别适合用于逆向工程和安全研究。
安装Python绑定非常简单:
bash复制pip install unicorn capstone
这里同时安装了Capstone反汇编引擎,后续分析中会用到。
假设我们有一个恶意软件样本,使用PEiD或Detect It Easy等工具检测发现它使用了某种加壳技术。通过静态分析,我们定位到了疑似Shellcode解密例程的代码段:
code复制.text:00401020 55 push ebp
.text:00401021 8B EC mov ebp, esp
.text:00401023 81 EC 00 01 00 00 sub esp, 100h
.text:00401029 60 pusha
.text:0040102A BE 00 20 40 00 mov esi, offset encrypted_data
.text:0040102F BF 00 30 40 00 mov edi, offset shellcode_buffer
.text:00401034 B9 00 01 00 00 mov ecx, 100h
.text:00401039 F3 A5 rep movsd
这段代码看起来是将加密数据复制到缓冲区,但实际解密过程可能隐藏在后续指令中。
我们需要创建一个x86架构的仿真器实例:
python复制from unicorn import *
from unicorn.x86_const import *
# 初始化32位x86仿真器
mu = Uc(UC_ARCH_X86, UC_MODE_32)
# 分配内存空间
MEM_BASE = 0x400000
MEM_SIZE = 2 * 1024 * 1024 # 2MB
mu.mem_map(MEM_BASE, MEM_SIZE)
# 设置栈空间
STACK_BASE = 0x0
STACK_SIZE = 0x10000
mu.mem_map(STACK_BASE, STACK_SIZE)
mu.reg_write(UC_X86_REG_ESP, STACK_BASE + STACK_SIZE - 4)
将恶意代码加载到仿真内存中:
python复制# 假设我们已经从样本中提取了代码段
with open("malware.bin", "rb") as f:
code = f.read()
CODE_ADDR = 0x401000
mu.mem_write(CODE_ADDR, code)
# 设置入口点
ENTRY_POINT = 0x401020
为了观察解密过程,我们需要设置几种Hook:
python复制# 指令级Hook
def hook_code(mu, address, size, user_data):
print(f"Executing at 0x{address:x}, size={size}")
# 反汇编当前指令
code = mu.mem_read(address, size)
md = Cs(CS_ARCH_X86, CS_MODE_32)
for i in md.disasm(code, address):
print(f"0x{i.address:x}:\t{i.mnemonic}\t{i.op_str}")
# 内存访问Hook
def hook_mem(mu, access, address, size, value, user_data):
if access == UC_MEM_WRITE:
print(f"Memory WRITE at 0x{address:x}, data={bytes(value).hex()}")
elif access == UC_MEM_READ:
print(f"Memory READ at 0x{address:x}, size={size}")
# 注册Hook
mu.hook_add(UC_HOOK_CODE, hook_code)
mu.hook_add(UC_HOOK_MEM_READ | UC_HOOK_MEM_WRITE, hook_mem)
现在我们可以开始仿真执行解密过程:
python复制try:
# 设置解密前的寄存器状态
mu.reg_write(UC_X86_REG_ESI, 0x402000) # 加密数据地址
mu.reg_write(UC_X86_REG_EDI, 0x403000) # 解密缓冲区
mu.reg_write(UC_X86_REG_ECX, 0x100) # 解密长度
# 开始仿真
mu.emu_start(ENTRY_POINT, CODE_ADDR + len(code))
# 获取解密后的Shellcode
decrypted = mu.mem_read(0x403000, 0x100)
with open("decrypted.bin", "wb") as f:
f.write(decrypted)
except UcError as e:
print(f"仿真错误: {e}")
对于复杂的解密算法,我们可以设置条件断点:
python复制def hook_code_conditional(mu, address, size, user_data):
# 当ECX=0x40时中断
ecx = mu.reg_read(UC_X86_REG_ECX)
if ecx == 0x40:
print(f"条件触发在0x{address:x}, ECX=0x40")
mu.emu_stop()
mu.hook_add(UC_HOOK_CODE, hook_code_conditional)
将上述功能封装成自动化分析工具:
python复制class ShellcodeAnalyzer:
def __init__(self, arch=UC_ARCH_X86, mode=UC_MODE_32):
self.mu = Uc(arch, mode)
self.breakpoints = set()
def load_code(self, code, base_addr):
self.mu.mem_map(base_addr, len(code) + 0x1000)
self.mu.mem_write(base_addr, code)
self.entry = base_addr
def add_breakpoint(self, addr):
self.breakpoints.add(addr)
def run(self):
try:
while True:
pc = self.mu.reg_read(UC_X86_REG_EIP)
if pc in self.breakpoints:
print(f"断点命中: 0x{pc:x}")
break
self.mu.emu_start(pc, pc + 1, count=1)
except UcError as e:
print(f"执行停止: {e}")
对于大型样本,仿真速度可能成为瓶颈。以下是一些优化建议:
python复制# 块级Hook示例
def hook_block(mu, address, size, user_data):
print(f"执行基本块 at 0x{address:x}, size={size}")
mu.hook_add(UC_HOOK_BLOCK, hook_block)
我们分析一个使用简单异或加密的样本:
python复制# 加密数据
encrypted_data = bytes.fromhex("31c9f7e1b00b51682f2f7368682f62696e89e3cd8031c931db40cd80")
# 解密函数
def xor_decrypt(data, key):
return bytes(b ^ key for b in data)
# 在仿真环境中实现
def hook_code_xor(mu, address, size, user_data):
if address == DECRYPT_FUNC:
# 获取解密参数
esi = mu.reg_read(UC_X86_REG_ESI)
edi = mu.reg_read(UC_X86_REG_EDI)
ecx = mu.reg_read(UC_X86_REG_ECX)
al = mu.reg_read(UC_X86_REG_AL) # 假设密钥在AL中
# 读取加密数据
data = mu.mem_read(esi, ecx)
# 解密并写回
decrypted = xor_decrypt(data, al)
mu.mem_write(edi, decrypted)
# 跳过解密循环
mu.reg_write(UC_X86_REG_EIP, address + size)
对于更复杂的加密算法如AES,我们可以结合Python的加密库:
python复制from Crypto.Cipher import AES
def hook_aes_decrypt(mu, address, size, user_data):
if address == AES_DECRYPT_FUNC:
# 获取密钥和IV
key_addr = mu.reg_read(UC_X86_REG_EBX)
iv_addr = mu.reg_read(UC_X86_REG_EDX)
key = mu.mem_read(key_addr, 16)
iv = mu.mem_read(iv_addr, 16)
# 获取加密数据
data_addr = mu.reg_read(UC_X86_REG_ESI)
data_len = mu.reg_read(UC_X86_REG_ECX)
ciphertext = mu.mem_read(data_addr, data_len)
# 解密
cipher = AES.new(key, AES.MODE_CBC, iv)
plaintext = cipher.decrypt(ciphertext)
# 写回结果
mu.mem_write(data_addr, plaintext)
# 跳过解密函数
mu.reg_write(UC_X86_REG_EIP, mu.reg_read(UC_X86_REG_EBP) + 4)
可以将Unicorn仿真结果导入IDA Pro进行分析:
python复制import idc
def import_decrypted_code(decrypted, va):
# 清除原有分析
idc.del_items(va, len(decrypted))
# 写入解密后的代码
for i, b in enumerate(decrypted):
idc.patch_byte(va + i, b)
# 重新分析
idc.create_insn(va)
idc.auto_wait()
整合各种功能构建完整分析工具:
python复制class MalwareAnalyzer:
def __init__(self):
self.mu = None
self.arch = UC_ARCH_X86
self.mode = UC_MODE_32
self.hooks = []
def load_sample(self, filename):
with open(filename, "rb") as f:
self.code = f.read()
def setup_emulator(self):
self.mu = Uc(self.arch, self.mode)
self.mu.mem_map(0x400000, 2 * 1024 * 1024)
self.mu.mem_write(0x401000, self.code)
def add_hook(self, hook_type, callback):
h = self.mu.hook_add(hook_type, callback)
self.hooks.append(h)
def analyze(self):
try:
self.mu.emu_start(0x401000, 0x401000 + len(self.code))
except UcError as e:
print(f"分析完成: {e}")
def dump_memory(self, addr, size, filename):
data = self.mu.mem_read(addr, size)
with open(filename, "wb") as f:
f.write(data)
在实际分析中,我发现设置合适的内存映射范围对分析成功至关重要。过小的内存空间会导致访问越界,而过大的空间则会降低仿真效率。通常我会先静态分析样本的内存访问模式,再设置相应的映射区域。