NOTE
以字节流(bytes 类型)为核心枢纽,所有数据类型转换都围绕字节流进行
能用 vscode 编辑器解决的直接解决是最快的,其次是 cyberchef,数据量大或者在脚本中间生成的才用 python 脚本
数字
(int).to_bytes
和 int.from_bytes
# 有符号大端整数 → 字节流
signed_num = -100
bytes_be = signed_num.to_bytes(4, byteorder='big', signed=True)
# 字节流 → 整数(自动处理符号)
data = b'\xff\xff'
num = int.from_bytes(data, byteorder='little', signed=True) # -1
数组
用 struct 库转换
import struct
def bytes_to_array_struct(data, element_size, signed=False, endian='little'):
fmt_char = {
(1, False): 'B', (1, True): 'b',
(2, False): 'H', (2, True): 'h',
(4, False): 'I', (4, True): 'i',
(8, False): 'Q', (8, True): 'q'
}[(element_size, signed)]
endian_char = '<' if endian == 'little' else '>'
count = len(data) // element_size
return list(struct.unpack(f'{endian_char}{count}{fmt_char}', data))
# 示例:4字节小端有符号整数
data = b'\xff\xff\xff\xff\xfe\xff\xff\xff'
arr = bytes_to_array_struct(data, 4, signed=True) # [-1, -2]
def array_to_bytes_struct(arr, element_size, signed=False, endian='little'):
fmt_char = {
(1, False): 'B', (1, True): 'b',
(2, False): 'H', (2, True): 'h',
(4, False): 'I', (4, True): 'i',
(8, False): 'Q', (8, True): 'q'
}[(element_size, signed)]
endian_char = '<' if endian == 'little' else '>'
return struct.pack(f'{endian_char}{len(arr)}{fmt_char}', *arr)
# 示例:4字节大端无符号整数
arr = [0x12345678, 0x9abcdef0]
bytes_data = array_to_bytes_struct(arr, 4, endian='big') # b'\x12\x34\x56\x78\x9a\xbc\xde\xf0'
或者用更慢的循环
def bytes_to_array_generator(data, element_size, signed=False, endian='little'):
"""内存友好的惰性转换"""
return (
int.from_bytes(data[i:i+element_size], endian, signed=signed)
for i in range(0, len(data), element_size)
)
# 转换为列表(小数据适用)
data = b'\x01\x00\x00\x00\x02\x00\x00\x00'
arr = list(bytes_to_array_generator(data, 4)) # [1, 2]
def array_to_bytes_chain(arr, element_size, signed=False, endian='little'):
"""适用于元素类型一致的情况"""
return b''.join(
num.to_bytes(element_size, endian, signed=signed)
for num in arr
)
字符字符串
(str).encode
和 (bytes).decode
# 带编码转换
s = "CTF{flag}"
utf16_bytes = s.encode('utf-16-le') # 指定小端序UTF-16
# 从字节恢复字符串(注意处理BOM)
recovered = utf16_bytes.decode('utf-16-le')
数字字符串
十六进制字符串
处理成 bytes.fromhex
可以处理的样子。生成则用 format 格式
def hex_str_to_bytes(hex_str):
"""处理各种分隔符的十六进制字符串:
"01 3e 2f" → b'\x01\x3e\x2f'
"0x01,0x3e,0x2f" → b'\x01\x3e\x2f'
"""
clean_str = hex_str.lower().replace('0x', '').replace(' ', '').replace(',', '')
return bytes.fromhex(clean_str)
def bytes_to_pretty_hex(byte_data, prefix="0x", sep=", "):
"""生成带格式的十六进制字符串:
b'\x01\x3e\x2f' → "0x01, 0x3e, 0x2f"
"""
return sep.join([f"{prefix}{b:02x}" for b in byte_data])
其他进制字符串
用正则分割,用 format 格式生成
import re
def parse_mixed_base_str(s, sep='[ ,]+'):
"""解析混合进制的数字字符串:
"0b1100001 0o141 0x61 97" → b'aaaa'
"""
bytes_list = []
# 使用正则分割所有可能的分隔符
for num_str in re.split(sep, s.strip()):
if not num_str:
continue
try:
# 识别进制前缀
if num_str.startswith('0x'):
base = 16
elif num_str.startswith('0b'):
base = 2
elif num_str.startswith('0o'):
base = 8
elif num_str.startswith('0') and len(num_str) > 1:
# 纯0开头但非0x/0b/0o的情况视为非法
raise ValueError(f"非法八进制格式: {num_str}")
else:
base = 10
num = int(num_str, base)
if not 0 <= num <= 0xff:
raise ValueError(f"数值 {num} 超出字节范围(0-255)")
bytes_list.append(num.to_bytes(1, 'big'))
except ValueError as e:
raise ValueError(f"解析错误 '{num_str}': {e}") from None
return b''.join(bytes_list)
# 示例用法
mixed_str = "0b01100001 0o141 0x61 97 0223"
# 转换结果: b'a' (0x61) + b'a' + b'a' + b'a' + b'\x93'
print(parse_mixed_base_str(mixed_str)) # b'aaaa\x93'
def bytes_to_custom_base(data, base=16, prefix='', sep=' ', zero_pad=0):
"""将字节流转为指定进制的字符串表示:
:param base: 2/8/10/16
:param prefix: 如 '0x', '0b' 等
:param zero_pad: 补零位数(如8表示二进制显示8位)
"""
base_info = {
2: {'fmt': 'b', 'digits': 8},
8: {'fmt': 'o', 'digits': 3},
10: {'fmt': 'd', 'digits': 3},
16: {'fmt': 'x', 'digits': 2}
}
if base not in base_info:
raise ValueError("不支持的进制,可选:2/8/10/16")
fmt_char = base_info[base]['fmt']
digits = zero_pad or base_info[base]['digits']
parts = []
for b in data:
# 处理不同进制的格式要求
if base == 2:
part = f"{b:0{digits}b}"
elif base == 8:
part = f"{b:0{digits}o}"
elif base == 10:
part = f"{b}"
else:
part = f"{b:0{digits}x}"
parts.append(f"{prefix}{part}")
return sep.join(parts)
# 示例
data = b'abc'
print(bytes_to_custom_base(data, 2, '0b', ' '))
# 0b01100001 0b01100010 0b01100011
print(bytes_to_custom_base(data, 8, '0o', ','))
# 0o141,0o142,0o143
print(bytes_to_custom_base(data, 10, '', ' '))
# 97 98 99
动态类型解析
def parse_dynamic_array(data, type_sequence):
"""按类型序列动态解析字节流
:param type_sequence: 类型描述列表,如 [
('num', 'I'),
('flag', '4s'),
('timestamp', 'Q')
]
"""
offset = 0
result = {}
for name, fmt in type_sequence:
size = struct.calcsize(fmt)
value = struct.unpack_from(f'<{fmt}', data, offset)[0]
# 处理字符串类型
if 's' in fmt:
value = value.decode('latin-1').rstrip('\x00')
result[name] = value
offset += size
return result, data[offset:]
# 示例:解析协议数据
protocol_data = b'\x2a\x00\x00\x00FLAG\x00\x00\xef\xcd\xab\x90\x78\x56\x34\x12'
types = [('id', 'I'), ('tag', '4s'), ('checksum', 'Q')]
parsed, remaining = parse_dynamic_array(protocol_data, types)
# 输出:{'id': 42, 'tag': 'FLAG', 'checksum': 1311768467294899695}