NOTE

以字节流(bytes 类型)为核心枢纽,所有数据类型转换都围绕字节流进行

能用 vscode 编辑器解决的直接解决是最快的,其次是 cyberchef,数据量大或者在脚本中间生成的才用 python 脚本

数字

(int).to_bytesint.from_bytes

# 有符号大端整数 → 字节流
signed_num = -100
bytes_be = signed_num.to_bytes(4, byteorder='big', signed=True)
 
# 字节流 → 整数(自动处理符号)
data = b'\xff\xff'
num = int.from_bytes(data, byteorder='little', signed=True)  # -1

数组

用 struct 库转换

import struct
 
def bytes_to_array_struct(data, element_size, signed=False, endian='little'):
    fmt_char = {
        (1, False): 'B', (1, True): 'b',
        (2, False): 'H', (2, True): 'h',
        (4, False): 'I', (4, True): 'i',
        (8, False): 'Q', (8, True): 'q'
    }[(element_size, signed)]
    
    endian_char = '<' if endian == 'little' else '>'
    count = len(data) // element_size
    return list(struct.unpack(f'{endian_char}{count}{fmt_char}', data))
 
# 示例:4字节小端有符号整数
data = b'\xff\xff\xff\xff\xfe\xff\xff\xff'
arr = bytes_to_array_struct(data, 4, signed=True)  # [-1, -2]
 
 
def array_to_bytes_struct(arr, element_size, signed=False, endian='little'):
    fmt_char = {
        (1, False): 'B', (1, True): 'b',
        (2, False): 'H', (2, True): 'h',
        (4, False): 'I', (4, True): 'i',
        (8, False): 'Q', (8, True): 'q'
    }[(element_size, signed)]
    
    endian_char = '<' if endian == 'little' else '>'
    return struct.pack(f'{endian_char}{len(arr)}{fmt_char}', *arr)
 
# 示例:4字节大端无符号整数
arr = [0x12345678, 0x9abcdef0]
bytes_data = array_to_bytes_struct(arr, 4, endian='big')  # b'\x12\x34\x56\x78\x9a\xbc\xde\xf0'

或者用更慢的循环

def bytes_to_array_generator(data, element_size, signed=False, endian='little'):
    """内存友好的惰性转换"""
    return (
        int.from_bytes(data[i:i+element_size], endian, signed=signed)
        for i in range(0, len(data), element_size)
    )
 
# 转换为列表(小数据适用)
data = b'\x01\x00\x00\x00\x02\x00\x00\x00'
arr = list(bytes_to_array_generator(data, 4))  # [1, 2]
 
def array_to_bytes_chain(arr, element_size, signed=False, endian='little'):
    """适用于元素类型一致的情况"""
    return b''.join(
        num.to_bytes(element_size, endian, signed=signed)
        for num in arr
    )

字符字符串

(str).encode(bytes).decode

# 带编码转换
s = "CTF{flag}"
utf16_bytes = s.encode('utf-16-le')  # 指定小端序UTF-16
 
# 从字节恢复字符串(注意处理BOM)
recovered = utf16_bytes.decode('utf-16-le')

数字字符串

十六进制字符串

处理成 bytes.fromhex 可以处理的样子。生成则用 format 格式

def hex_str_to_bytes(hex_str):
    """处理各种分隔符的十六进制字符串:
    "01 3e 2f" → b'\x01\x3e\x2f'
    "0x01,0x3e,0x2f" → b'\x01\x3e\x2f'
    """
    clean_str = hex_str.lower().replace('0x', '').replace(' ', '').replace(',', '')
    return bytes.fromhex(clean_str)
    
def bytes_to_pretty_hex(byte_data, prefix="0x", sep=", "):
    """生成带格式的十六进制字符串:
    b'\x01\x3e\x2f' → "0x01, 0x3e, 0x2f"
    """
    return sep.join([f"{prefix}{b:02x}" for b in byte_data])

其他进制字符串

用正则分割,用 format 格式生成

import re
 
def parse_mixed_base_str(s, sep='[ ,]+'):
    """解析混合进制的数字字符串:
    "0b1100001 0o141 0x61 97" → b'aaaa'
    """
    bytes_list = []
    # 使用正则分割所有可能的分隔符
    for num_str in re.split(sep, s.strip()):
        if not num_str:
            continue
            
        try:
            # 识别进制前缀
            if num_str.startswith('0x'):
                base = 16
            elif num_str.startswith('0b'):
                base = 2
            elif num_str.startswith('0o'):
                base = 8
            elif num_str.startswith('0') and len(num_str) > 1: 
                # 纯0开头但非0x/0b/0o的情况视为非法
                raise ValueError(f"非法八进制格式: {num_str}")
            else:
                base = 10
                
            num = int(num_str, base)
            if not 0 <= num <= 0xff:
                raise ValueError(f"数值 {num} 超出字节范围(0-255)")
                
            bytes_list.append(num.to_bytes(1, 'big'))
            
        except ValueError as e:
            raise ValueError(f"解析错误 '{num_str}': {e}") from None
            
    return b''.join(bytes_list)
 
# 示例用法
mixed_str = "0b01100001 0o141 0x61 97 0223"
# 转换结果: b'a' (0x61) + b'a' + b'a' + b'a' + b'\x93'
print(parse_mixed_base_str(mixed_str))  # b'aaaa\x93'
def bytes_to_custom_base(data, base=16, prefix='', sep=' ', zero_pad=0):
    """将字节流转为指定进制的字符串表示:
    :param base: 2/8/10/16
    :param prefix: 如 '0x', '0b' 等
    :param zero_pad: 补零位数(如8表示二进制显示8位)
    """
    base_info = {
        2: {'fmt': 'b', 'digits': 8},
        8: {'fmt': 'o', 'digits': 3},
        10: {'fmt': 'd', 'digits': 3},
        16: {'fmt': 'x', 'digits': 2}
    }
    
    if base not in base_info:
        raise ValueError("不支持的进制,可选:2/8/10/16")
        
    fmt_char = base_info[base]['fmt']
    digits = zero_pad or base_info[base]['digits']
    
    parts = []
    for b in data:
        # 处理不同进制的格式要求
        if base == 2:
            part = f"{b:0{digits}b}"
        elif base == 8:
            part = f"{b:0{digits}o}"
        elif base == 10:
            part = f"{b}"
        else:
            part = f"{b:0{digits}x}"
            
        parts.append(f"{prefix}{part}")
        
    return sep.join(parts)
 
# 示例
data = b'abc'
print(bytes_to_custom_base(data, 2, '0b', ' '))  
# 0b01100001 0b01100010 0b01100011
print(bytes_to_custom_base(data, 8, '0o', ','))  
# 0o141,0o142,0o143
print(bytes_to_custom_base(data, 10, '', ' '))   
# 97 98 99

动态类型解析

def parse_dynamic_array(data, type_sequence):
    """按类型序列动态解析字节流
    :param type_sequence: 类型描述列表,如 [
        ('num', 'I'), 
        ('flag', '4s'),
        ('timestamp', 'Q')
    ]
    """
    offset = 0
    result = {}
    for name, fmt in type_sequence:
        size = struct.calcsize(fmt)
        value = struct.unpack_from(f'<{fmt}', data, offset)[0]
        # 处理字符串类型
        if 's' in fmt:
            value = value.decode('latin-1').rstrip('\x00')
        result[name] = value
        offset += size
    return result, data[offset:]
 
# 示例:解析协议数据
protocol_data = b'\x2a\x00\x00\x00FLAG\x00\x00\xef\xcd\xab\x90\x78\x56\x34\x12'
types = [('id', 'I'), ('tag', '4s'), ('checksum', 'Q')]
parsed, remaining = parse_dynamic_array(protocol_data, types)
# 输出:{'id': 42, 'tag': 'FLAG', 'checksum': 1311768467294899695}