pyc花指令
常見的python花指令形式有兩種:單重疊指令和多重疊指令。
以下以python3.8為例,指令長度為2位元組。
單重疊指令:
例如pyc經過反編譯後得到的東西為
0 JUMP_ABSOLUTE [71 04] 5
2 PRINT_ITEM [47 --]
4 LOAD_CONST [64 10] 16
6 STOP_CODE [00 --]
實際在執行時,並不會執行 2 PRINT_ITEM [47 --]
:
0 JUMP_ABSOLUTE [71 04] 5
4 LOAD_CONST [64 10] 16
單重疊指令多是分支的跳轉,導致一些反編譯工具如pycdc、uncompyle6出錯。
多重疊指令:
0 EXTENDED_ARG [91 64]
2 EXTENDED_ARG [91 53]
4 JUMP_ABSOLUTE [71 01]
實際執行時
0 EXTENDED_ARG [91 64]
2 EXTENDED_ARG [91 53]
4 JUMP_ABSOLUTE [71 02]
1 LOAD_CONST [64 91]
3 RETURN_VALUE [53 --]
多重疊指令是將指令的資料部分當作下一條指令的opcode部分執行,在跳轉基礎上進一步混淆控制流的技術手段,可以有效對抗逆向者。
NOP花指令:
NOP為junk code,只要不影響正常執行邏輯,其他的指令可自由發揮,含有NOP的pyc均不可以被現有的反編譯工具反編譯成py程式碼。
去除花指令
pyc去除花指令後,很大可能是不能被現有工具反編譯成原始碼的,因為現有反編譯工具對pyc要求比較嚴格,不能有nop以及其他junk指令,但程式執行時python虛擬機器卻沒有。
因此不同於用ida patch 彙編程式碼,想在patch過的pyc反編譯回原來的原始碼,工作量還是蠻大的。
下面以[2022年安洵杯]flower.pyc為例
626 LOAD_GLOBAL 6: ord
628 LOAD_GLOBAL 18: Base64Table
630 LOAD_FAST 3: i
632 LOAD_CONST 22: 22
634 BINARY_XOR
636 BINARY_SUBSCR
638 CALL_FUNCTION 1
640 STORE_FAST 15: tmp2
這一段是把base64[i]改為了base64[i^22]
258 LOAD_NAME 16: ret
260 LOAD_NAME 18: i
262 LOAD_NAME 18: i
264 LOAD_CONST 40: 4
266 BINARY_ADD
268 BUILD_SLICE 2
270 BINARY_SUBSCR
272 LOAD_NAME 19: Key1
274 LOAD_NAME 17: j
276 STORE_SUBSCR
278 LOAD_NAME 17: j
280 LOAD_CONST 41: 1
282 BINARY_ADD
284 STORE_NAME 17: j
286 LOAD_NAME 18: i
288 LOAD_CONST 40: 4
290 BINARY_ADD
292 STORE_NAME 18: i
294 LOAD_NAME 17: j
296 LOAD_CONST 42: 10
298 COMPARE_OP 2 (==)
300 POP_JUMP_IF_FALSE 258
304 JUMP_ABSOLUTE 312
308 JUMP_ABSOLUTE 258
轉化成py程式碼就是
input_str = input()
ret = My_base64_encode(input_str)
j = 0
i = 0
Key1 = "1234512345"
len_ret = len(ret) // 4
while j != 10:
Key1[j] = ret[i:i+4]
j = j + 1
i = i + 4
keyCheck = ''
if keyCheck[0] == keyInputCom[8]:
然後後面有一堆重複的,提取出來就是
0 == 8
1 == 9
2 == 1
3 == 7
4 == 5
5 == 0
6 == 6
7 == 4
8 == 3
9 == 2
然後再寫題解的程式碼就可以了。
利用指令碼去除花指令:用python模擬執行python的opcode,遇到分支就跳轉,直到ret_value停止本次執行,採用的是簡單的DFS遞迴演算法
import marshal, sys, opcode, types, dis
NOP = 9
HAVE_ARGUMENT = 90
JUMP_FORWARD = 110
JUMP_IF_FALSE_OR_POP = 111
JUMP_IF_TRUE_OR_POP = 112
JUMP_ABSOLUTE = 113
POP_JUMP_IF_FALSE = 114
POP_JUMP_IF_TRUE = 115
CONTINUE_LOOP = 119
FOR_ITER = 93
RETURN_VALUE = 83
used_set = set()
def deconf_inner(code, now):
global used_set
while code[now] != RETURN_VALUE:
if now in used_set:
break
used_set.add(now)
if code[now] >= HAVE_ARGUMENT:
used_set.add(now+1)
used_set.add(now+2)
op = code[now]
#print(str(now) + " " + opcode.opname[op])
if op == JUMP_FORWARD:
arg = code[now+2] << 8 | code[now+1]
now += arg + 3
continue
elif op == JUMP_ABSOLUTE:
arg = code[now+2] << 8 | code[now+1]
now = arg
continue
elif op == JUMP_IF_TRUE_OR_POP:
arg = code[now+2] << 8 | code[now+1]
deconf_inner(code, arg)
elif op == JUMP_IF_FALSE_OR_POP:
arg = code[now+2] << 8 | code[now+1]
deconf_inner(code, arg)
elif op == POP_JUMP_IF_TRUE:
arg = code[now+2] << 8 | code[now+1]
deconf_inner(code, arg)
elif op == POP_JUMP_IF_FALSE:
arg = code[now+2] << 8 | code[now+1]
deconf_inner(code, arg)
elif op == CONTINUE_LOOP:
arg = code[now+2] << 8 | code[now+1]
deconf_inner(code, arg)
elif op == FOR_ITER:
arg = code[now+2] << 8 | code[now+1]
deconf_inner(code, now + arg + 3)
if op < HAVE_ARGUMENT:
now += 1
else:
now += 3
used_set.add(now)
if code[now] >= HAVE_ARGUMENT:
used_set.add(now+1)
used_set.add(now+2)
def deconf(code):
global used_set
used_set = set() #Remember to clean up used_set for every target function
cod = list(map(ord, code))
deconf_inner(cod, 0)
for i in range(len(cod)):
if i not in used_set:
cod[i] = NOP
return "".join(list(map(chr, cod)))
with open(sys.argv[1], 'rb') as f:
header = f.read(8)
code = marshal.load(f)
print(code.co_consts,type(code))
'''
print(dis.dis(deconf(code.co_consts[3].co_code)))
'''
consts = list()
for i in range(len(code.co_consts)):
if hasattr(code.co_consts[i], 'co_code'):
consts.append(types.CodeType(code.co_consts[i].co_argcount,
# c.co_kwonlyargcount, Add this in Python3
code.co_consts[i].co_nlocals,
code.co_consts[i].co_stacksize,
code.co_consts[i].co_flags,
deconf(code.co_consts[i].co_code),
code.co_consts[i].co_consts,
code.co_consts[i].co_names,
code.co_consts[i].co_varnames,
code.co_consts[i].co_filename,
code.co_consts[i].co_name,
code.co_consts[i].co_firstlineno,
code.co_consts[i].co_lnotab, # In general, You should adjust this
code.co_consts[i].co_freevars,
code.co_consts[i].co_cellvars))
else:
consts.append(code.co_consts[i])
mode = types.CodeType(code.co_argcount,
# c.co_kwonlyargcount, Add this in Python3
code.co_nlocals,
code.co_stacksize,
code.co_flags,
deconf(code.co_code),
tuple(consts),
code.co_names,
code.co_varnames,
code.co_filename,
code.co_name,
code.co_firstlineno,
code.co_lnotab, # In general, You should adjust this
code.co_freevars,
code.co_cellvars)
f = open(sys.argv[1]+".mod", 'wb')
f.write(header)
marshal.dump(mode, f)
import marshal, sys, opcode, types, dis
import opcode
def getopcode(opname):
return opcode.opname.index(opname)
NOP = getopcode('NOP')
# HAVE_ARGUMENT = getopcode('HAVE_ARGUMENT') # py2.7
JUMP_FORWARD = getopcode('JUMP_FORWARD')
JUMP_IF_FALSE_OR_POP = getopcode('JUMP_IF_FALSE_OR_POP')
JUMP_IF_TRUE_OR_POP = getopcode('JUMP_IF_TRUE_OR_POP')
JUMP_ABSOLUTE = getopcode('JUMP_ABSOLUTE')
POP_JUMP_IF_FALSE = getopcode('POP_JUMP_IF_FALSE')
POP_JUMP_IF_TRUE = getopcode('POP_JUMP_IF_TRUE')
EXTENDED_ARG = getopcode('EXTENDED_ARG')
# CONTINUE_LOOP = getopcode('CONTINUE_LOOP') # py2.7
FOR_ITER = getopcode('FOR_ITER')
RETURN_VALUE = getopcode('RETURN_VALUE')
used_set = set()
def deconf_inner(code, now):
global used_set
while code[now] != RETURN_VALUE:
if now in used_set:
break
used_set.add(now)
used_set.add(now + 1)
op = code[now]
# print(str(now) + " " + opcode.opname[op])
if op == EXTENDED_ARG: # 對JUMP_FORWARD帶有EXTENDED_ARG的處理
# 第一層
op_next = code[now + 2]
now += 2
used_set.add(now)
used_set.add(now+1)
if op_next == EXTENDED_ARG:
# 第二層
arg = code[now - 1] << 8|code[now + 1]
op_next_next = code[now + 2]
now += 2
used_set.add(now)
used_set.add(now+1)
if op_next_next == EXTENDED_ARG:
arg = arg << 8 | code[now + 1]
# 第三層
if op_next == JUMP_FORWARD or op_next == FOR_ITER:
arg = arg << 8 | code[now + 1]
deconf_inner(code, arg + now + 2)
else:
arg = arg << 8 | code[now + 1]
deconf_inner(code, arg)
elif op_next == JUMP_FORWARD or op_next == FOR_ITER:
arg = code[now - 1] << 8 | code[now + 1]
deconf_inner(code, arg + now + 2)
else:
arg = code[now - 1] << 8 | code[now + 1]
deconf_inner(code, arg)
elif op_next == JUMP_FORWARD or op_next == FOR_ITER:
arg = code[now - 1] << 8 | code[now + 1]
deconf_inner(code, arg + now + 2)
else:
arg = code[now - 1] << 8 | code[now + 1]
deconf_inner(code, arg)
elif op == JUMP_FORWARD:
arg = code[now + 1]
now += arg + 2
op_next = code[now]
if op_next == JUMP_FORWARD or arg == 0 or arg == 1 or arg == 2 or arg == 4: # 一般JUMP_FORWARD引數為0、2、4都為花指令
used_set.remove(now - (arg + 2))
used_set.remove(now - (arg + 2) + 1)
continue
elif op == JUMP_ABSOLUTE:
arg = code[now + 1]
now = arg
continue
elif op == JUMP_IF_TRUE_OR_POP:
arg = code[now + 1]
deconf_inner(code, arg)
elif op == JUMP_IF_FALSE_OR_POP:
arg = code[now + 1]
deconf_inner(code, arg)
elif op == POP_JUMP_IF_TRUE:
arg = code[now + 1]
deconf_inner(code, arg)
elif op == POP_JUMP_IF_FALSE:
arg = code[now + 1]
deconf_inner(code, arg)
elif op == FOR_ITER:
arg = code[now + 1]
deconf_inner(code, now + arg + 2)
now += 2
used_set.add(now)
def deconf(code):
global used_set
used_set = set() # Remember to clean up used_set for every target function
# cod = list(map(ord, code))
cod = list(code)
deconf_inner(cod, 0)
for i in range(len(cod)):
if i not in used_set:
cod[i] = NOP
# aa = bytes(cod)
aa = b''.join(map(lambda x: int.to_bytes(x, 1, 'little'), cod))
return aa
filename = 'PYC.pyc'
with open(filename, 'rb') as f:
header = f.read(16)
code = marshal.load(f)
print(code.co_consts)
'''
print(dis.dis(deconf(code.co_consts[3].co_code)))
'''
consts = list()
for i in range(len(code.co_consts)):
if hasattr(code.co_consts[i], 'co_code'):
consts.append(types.CodeType(code.co_consts[i].co_argcount,
code.co_posonlyargcount,
code.co_kwonlyargcount, # Add this in Python3
code.co_consts[i].co_nlocals,
code.co_consts[i].co_stacksize,
code.co_consts[i].co_flags,
deconf(code.co_consts[i].co_code),
code.co_consts[i].co_consts,
code.co_consts[i].co_names,
code.co_consts[i].co_varnames,
code.co_consts[i].co_filename,
code.co_consts[i].co_name,
code.co_consts[i].co_firstlineno,
code.co_consts[i].co_lnotab, # In general, You should adjust this
code.co_consts[i].co_freevars,
code.co_consts[i].co_cellvars))
else:
consts.append(code.co_consts[i])
mode = types.CodeType(code.co_argcount,
code.co_posonlyargcount,
code.co_kwonlyargcount, # Add this in Python3
code.co_nlocals,
code.co_stacksize,
code.co_flags,
deconf(code.co_code),
tuple(consts),
code.co_names,
code.co_varnames,
code.co_filename,
code.co_name,
code.co_firstlineno,
code.co_lnotab, # In general, You should adjust this
code.co_freevars,
code.co_cellvars)
f = open(filename + ".mod", 'wb')
f.write(header)
marshal.dump(mode, f)