修改 sdk 里的 ida_func.py
import idc
import idaapi
import ida_ida
import ida_idaapi
import idautils
import re
class FuncSigFeature:
def __init__(self):
self.file_path = idc.get_input_file_path()
self.code_list = ["",".text",".plt",".got","extern",".pdata",".bss"]
self.control_ins_list = ["call","jc","jnc","jz","jnz","js","jns","jo","jno","jp",
"jpe","jnp","jpo","ja","jnbe","jae","jnb","jb","jnae","jbe",
"jna","je","jne","jg","jnle","jge","jnl","jl","jnge","jle","jng"]
self.string_list = dict()
for s in idautils.Strings():
self.string_list[str(s)] = s.ea
def get_file_structure(self):
arch = ida_ida.inf_get_procname()
if ida_ida.inf_is_be():
endian = "MSB"
else:
endian = "LSB"
return arch, endian
def get_file_type(self):
file_format = ""
file_type = ""
if ida_ida.inf_is_64bit():
file_format = "64"
elif ida_ida.inf_is_32bit():
file_format = "32"
ft = ida_ida.inf_get_filetype()
if ft == idaapi.f_PE:
file_type = "PE"
elif ft == idaapi.f_ELF:
file_type = "ELF"
return file_format, file_type
def get_module_info(self):
module_info = ""
if len(idc.ARGV) == 2:
module_info = idc.ARGV[1]
return module_info
def byte2str(self, l):
if "bytes" in str(type(l)):
l = l.decode()
return l
def extract_const(self, ins_addr):
const_str = ""
op_str = idc.print_insn_mnem(ins_addr)
if op_str not in self.control_ins_list:
for i in range(2):
operand_type = idc.get_operand_type(ins_addr, i)
if operand_type == idc.o_mem:
const_addr = idc.get_operand_value(ins_addr, i)
if idc.get_segm_name(const_addr) not in self.code_list:
str_const = idc.get_strlit_contents(const_addr)
if str_const:
str_const = self.byte2str(str_const)
if (str_const in self.string_list) and (const_addr == self.string_list[str_const]):
const_str += str_const
break
return const_str
def get_ins_feature(self, start_ea):
ins_str_list = list()
ins_bytes_list = list()
ins_list = list(idautils.FuncItems(start_ea))
for ins_addr in ins_list:
ins_bytes = idc.get_bytes(ins_addr, idc.get_item_size(ins_addr))
ins_bytes_list.append(ins_bytes)
ins_str = self.extract_const(ins_addr)
ins_str_list.append(ins_str)
return ins_bytes_list, ins_str_list
def filter_segment(self, func_addr):
ignore_list = ["extern",".plt",".got",".idata"]
if idc.get_segm_name(func_addr) in ignore_list:
return True
else:
return False
def get_func_feature(ea):
content = dict()
pfn = idaapi.get_func(ea)
if pfn:
func_addr = pfn.start_ea
Func = FuncSigFeature()
if Func.filter_segment(func_addr):
return None
arch, endian = Func.get_file_structure()
file_format, file_type = Func.get_file_type()
module_info = Func.get_module_info()
ins_bytes_list, ins_str_list = Func.get_ins_feature(func_addr)
content["extmsg"] = [arch, endian, file_format, file_type, module_info]
content["ins_bytes"] = ins_bytes_list
content["ins_str"] = ins_str_list
content["func_name"] = idaapi.get_func_name(func_addr)
return content
else:
return None
怎么感觉识别率好差啊,也太菜了吧