https://www.cnblogs.com/lordtianqiyi/articles/16822639.html

static void chacha20_block(uint32_t in[16], uint8_t out[64], int num_rounds) { // num_rounds 一般为20 
    int i;
    uint32_t x[16];
 
    memcpy(x, in, sizeof(uint32_t) * 16);
 
    for (i = num_rounds; i > 0; i -= 2) {
        //odd round  // 奇数行变换
        chacha20_quarterround(x, 0, 4,  8, 12);
        chacha20_quarterround(x, 1, 5,  9, 13);
        chacha20_quarterround(x, 2, 6, 10, 14);
        chacha20_quarterround(x, 3, 7, 11, 15);
        //even round    // 偶数列变换
        chacha20_quarterround(x, 0, 5, 10, 15);
        chacha20_quarterround(x, 1, 6, 11, 12);
        chacha20_quarterround(x, 2, 7,  8, 13);
        chacha20_quarterround(x, 3, 4,  9, 14);
    }
 
    for (i = 0; i < 16; i++) {
        x[i] += in[i];
    }
 
    chacha20_serialize(x, out);
}
 
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d) {
    x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16);
    x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12);
    x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a],  8);
    x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c],  7);
}

调用:

uint8_t key[] = {
    0x00, 0x01, 0x02, 0x03,
    0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0a, 0x0b,
    0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13,
    0x14, 0x15, 0x16, 0x17,
    0x18, 0x19, 0x1a, 0x1b,
    0x1c, 0x1d, 0x1e, 0x1f
};
 
uint8_t nonce[] = {                // 随机数 
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x00
};
 
uint8_t input[114] = {
    0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
    0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
    0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
    0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66, 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f,
    0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20, 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20,
    0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75, 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73,
    0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
    0x74, 0x2e
};
 
uint8_t encrypt[114];
uint8_t decrypt[114];
 
ChaCha20XOR(key, 1, nonce, input, encrypt, 114);                //1 就是conter
 
void ChaCha20XOR(uint8_t key[32], uint32_t counter, uint8_t nonce[12], uint8_t *in, uint8_t *out, int inlen) {
    int i, j;
 
    uint32_t s[16];
    uint8_t block[64];
 
    chacha20_init_state(s, key, counter, nonce);
 
    for (i = 0; i < inlen; i += 64) {
        chacha20_block(s, block, 20);
        s[12]++;
 
        for (j = i; j < i + 64; j++) {
            if (j >= inlen) {
                break;
            }
            out[j] = in[j] ^ block[j - i];
        }
    }
}
 

1/4 轮操作

在 ChaCha20 算法当中, 一个基础的操作即为 1/4 轮运算, 它主要操作 4 个 32 位的无符号整数,具体操作如下:

a += b; d ^= a; d <<<= 16;
c += d; b ^= c; b <<<= 12;
a += b; d ^= a; d <<<= 8;
c += d; b ^= c; b <<<= 7;

初始化矩阵

矩阵的输入为一个 256 位的密钥、32 位随机数、96 位计数器值以及 4×32 位的常数,它们均填充在 32 位整型数组中作为初始矩阵,如下图所示:

image.gifChaCha20 初始化矩阵

块函数 (ChaCha20 Block Function)

这个块函数输入是之前所生成的状态矩阵, 最终输出 64bit 的 ” 随机化 ” 的字节, 具体操作如下所示:

到这里, ChaCha20 的基本原理就结束了, 整个密码结构并不是很复杂, 整体思路也比较清晰。