zhanzr21 发表于 2022-2-7 20:45

【N32G45XVL-STB_V1.1】开发板评测2: 硬件密码单元测试

国民技术的主要产品是专用加密芯片, 如U盾, 各种充值卡, 银**, 会员卡上的芯片. 这些芯片上都会涉及到加解密. 所以通用芯片上也有硬件加密单元也就不为奇.

根据用户手册, N32G457上的支持的硬件加密算法有:


算法支持 DES/3DES、 AES、 SHA1/SHA224/SHA256、 SM1、 SM3、 SM4F、 SM7、 MD5、 CRC16/CRC32、 TRNG
给对密码算法不熟的坛友简介一下子:

DES/3DES
这是对称加解密算法, 属于比较老的的算法种类, 目前已经在淘汰的边缘, 但是很多老产品还在使用. 之所以要淘汰, 是因为目前的硬件发展使得暴力**DES成本很低了.所谓对称算法是相对于非对称算法来说的, 对称算法中, 加解密过程是对称的, 密钥是通用的. 非对称算法中有私钥公钥之分. 非对称算法比对称算法要复杂, 所以手上拿的这个芯片没有相应的硬件加速单元, 成本和功能的平衡.
3DES是DES算法接连做三次, 增加暴力**成本, 其实也只是加个补丁. 目前有更好的算法供选择, 新产品中不建议使用DES/3DES算法.

AES
刚刚说到DES/3DES目前属于被淘汰的算法, AES就是主要替代算法. 理解为DES/3DES的升级版本即可.

SHA1/SHA224/SHA256/MD5

这都是哈希算法, 也叫散列算法, SHA1和MD5属于要淘汰的算法, 新产品不建议使用, 原因也是目前硬件暴力**的成本下降, 不安全.

SM1/SM4F/SM7

国家密码局颁布的商用密码, 都是对称算法, 用于商业金融领域, 简单理解为DES/3DES/AES的国内替代品即可.

SM3
SHA256的国内替代算法.


目前密码学界的共识是: SM系列算法安全度高于AES/SHA256系列. 但是AES/SH256是国际通用的, 遇到的可能性更大.

另外, S * M 是 商用密码的商密二字的汉语拼音缩写. 不知道论坛发贴让不让发, 有点歧义.

这些算法都可以用软件来实现, 但是用硬件实现后性能要高一点. 比如DES-EBC简单数百行即可实现. (EBC是对称加密算法的模式代号).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef unsigned char ubyte;

#define KEY_LEN 8
typedef ubyte simple_des_key_t;

const static ubyte PC1[] = {
    57, 49, 41, 33, 25, 17,9,
   1, 58, 50, 42, 34, 26, 18,
    10,2, 59, 51, 43, 35, 27,
    19, 11,3, 60, 52, 44, 36,
    63, 55, 47, 39, 31, 23, 15,
   7, 62, 54, 46, 38, 30, 22,
    14,6, 61, 53, 45, 37, 29,
    21, 13,5, 28, 20, 12,4
};

const static ubyte PC2[] = {
    14, 17, 11, 24,1,5,
   3, 28, 15,6, 21, 10,
    23, 19, 12,4, 26,8,
    16,7, 27, 20, 13,2,
    41, 52, 31, 37, 47, 55,
    30, 40, 51, 45, 33, 48,
    44, 49, 39, 56, 34, 53,
    46, 42, 50, 36, 29, 32
};

const static ubyte IP[] = {
    58, 50, 42, 34, 26, 18, 10,2,
    60, 52, 44, 36, 28, 20, 12,4,
    62, 54, 46, 38, 30, 22, 14,6,
    64, 56, 48, 40, 32, 24, 16,8,
    57, 49, 41, 33, 25, 17,9,1,
    59, 51, 43, 35, 27, 19, 11,3,
    61, 53, 45, 37, 29, 21, 13,5,
    63, 55, 47, 39, 31, 23, 15,7
};

const static ubyte E[] = {
    32,1,2,3,4,5,
   4,5,6,7,8,9,
   8,9, 10, 11, 12, 13,
    12, 13, 14, 15, 16, 17,
    16, 17, 18, 19, 20, 21,
    20, 21, 22, 23, 24, 25,
    24, 25, 26, 27, 28, 29,
    28, 29, 30, 31, 32,1
};

const static ubyte S[] = {
    {
      14,4, 13,1,2, 15, 11,8,3, 10,6, 12,5,9,0,7,
         0, 15,7,4, 14,2, 13,1, 10,6, 12, 11,9,5,3,8,
         4,1, 14,8, 13,6,2, 11, 15, 12,9,7,3, 10,5,0,
      15, 12,8,2,4,9,1,7,5, 11,3, 14, 10,0,6, 13
    },
    {
      15,1,8, 14,6, 11,3,4,9,7,2, 13, 12,0,5, 10,
         3, 13,4,7, 15,2,8, 14, 12,0,1, 10,6,9, 11,5,
         0, 14,7, 11, 10,4, 13,1,5,8, 12,6,9,3,2, 15,
      13,8, 10,1,3, 15,4,2, 11,6,7, 12,0,5, 14,9
    },
    {
      10,0,9, 14,6,3, 15,5,1, 13, 12,7, 11,4,2,8,
      13,7,0,9,3,4,6, 10,2,8,5, 14, 12, 11, 15,1,
      13,6,4,9,8, 15,3,0, 11,1,2, 12,5, 10, 14,7,
         1, 10, 13,0,6,9,8,7,4, 15, 14,3, 11,5,2, 12
    },
    {
         7, 13, 14,3,0,6,9, 10,1,2,8,5, 11, 12,4, 15,
      13,8, 11,5,6, 15,0,3,4,7,2, 12,1, 10, 14,9,
      10,6,9,0, 12, 11,7, 13, 15,1,3, 14,5,2,8,4,
         3, 15,0,6, 10,1, 13,8,9,4,5, 11, 12,7,2, 14
    },
    {
         2, 12,4,1,7, 10, 11,6,8,5,3, 15, 13,0, 14,9,
      14, 11,2, 12,4,7, 13,1,5,0, 15, 10,3,9,8,6,
         4,2,1, 11, 10, 13,7,8, 15,9, 12,5,6,3,0, 14,
      11,8, 12,7,1, 14,2, 13,6, 15,0,9, 10,4,5,3
    },
    {
      12,1, 10, 15,9,2,6,8,0, 13,3,4, 14,7,5, 11,
      10, 15,4,2,7, 12,9,5,6,1, 13, 14,0, 11,3,8,
         9, 14, 15,5,2,8, 12,3,7,0,4, 10,1, 13, 11,6,
         4,3,2, 12,9,5, 15, 10, 11, 14,1,7,6,0,8, 13
    },
    {
         4, 11,2, 14, 15,0,8, 13,3, 12,9,7,5, 10,6,1,
      13,0, 11,7,4,9,1, 10, 14,3,5, 12,2, 15,8,6,
         1,4, 11, 13, 12,3,7, 14, 10, 15,6,8,0,5,9,2,
         6, 11, 13,8,1,4, 10,7,9,5,0, 15, 14,2,3, 12
    },
    {
      13,2,8,4,6, 15, 11,1, 10,9,3, 14,5,0, 12,7,
         1, 15, 13,8, 10,3,7,4, 12,5,6, 11,0, 14,9,2,
         7, 11,4,1,9, 12, 14,2,0,6, 10, 13, 15,3,5,8,
         2,1, 14,7,4, 10,8, 13, 15, 12,9,0,3,5,6, 11
    }
};

const static ubyte P[] = {
    16,7, 20, 21,
    29, 12, 28, 17,
   1, 15, 23, 26,
   5, 18, 31, 10,
   2,8, 24, 14,
    32, 27,3,9,
    19, 13, 30,6,
    22, 11,4, 25
};

const static ubyte IP2[] = {
    40,8, 48, 16, 56, 24, 64, 32,
    39,7, 47, 15, 55, 23, 63, 31,
    38,6, 46, 14, 54, 22, 62, 30,
    37,5, 45, 13, 53, 21, 61, 29,
    36,4, 44, 12, 52, 20, 60, 28,
    35,3, 43, 11, 51, 19, 59, 27,
    34,2, 42, 10, 50, 18, 58, 26,
    33,1, 41,9, 49, 17, 57, 25
};

const static ubyte SHIFTS[] = {
    1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
};

typedef struct {
    ubyte *data;
    int len;
} String;

/*
* Transform a single nibble into a hex character
*
* in: a value < 0x10
*
* returns: the character that represents the nibble
*/
static char toHex(ubyte in) {
    if (0x00 <= in && in < 0x0A) {
      return '0' + in;
    }
    if (0x0A <= in && in <= 0x0F) {
      return 'A' + in - 0x0A;
    }
    return 0;
}

/*
* Convert an array of bytes into a string
*
* ptr: the array of bytes
* len: the number of bytes
* out: a buffer allocated by the caller with enough space for 2*len+1 characters
*/
static void printBytes(const ubyte *ptr, int len, char *out) {
    while (len-- > 0) {
      *out++ = toHex(*ptr >> 4);
      *out++ = toHex(*ptr & 0x0F);

      ptr++;
    }
    *out = 0;
}

/*
* Gets the value of a bit in an array of bytes
*
* src: the array of bytes to index
* index: the desired bit to test the value of
*
* returns: the bit at the specified position in the array
*/
static int peekBit(const ubyte *src, int index) {
    int cell = index / 8;
    int bit = 7 - index % 8;
    return (src & (1 << bit)) != 0;
}

/*
* Sets the value of a bit in an array of bytes
*
* dst: the array of bits to set a bit in
* index: the position of the bit to set
* value: the value for the bit to set
*/
static void pokeBit(ubyte *dst, int index, int value) {
    int cell = index / 8;
    int bit = 7 - index % 8;
    if (value == 0) {
      dst &= ~(1 << bit);
    } else {
      dst |= (1 << bit);
    }
}

/*
* Transforms one array of bytes by shifting the bits the specified number of positions
*
* src: the array to shift bits from
* len: the length of the src array
* times: the number of positions that the bits should be shifted
* dst: a bytes array allocated by the caller to store the shifted values
*/
static void shiftLeft(const ubyte *src, int len, int times, ubyte *dst) {
    int i, t;
    for (i = 0; i <= len; ++i) {
      pokeBit(dst, i, peekBit(src, i));
    }
    for (t = 1; t <= times; ++t) {
      int temp = peekBit(dst, 0);
      for (i = 1; i <= len; ++i) {
            pokeBit(dst, i - 1, peekBit(dst, i));
      }
      pokeBit(dst, len - 1, temp);
    }
}

/*
* Calculates the sub keys to be used in processing the messages
*
* key: the array of bytes representing the key
* ks: the subkeys that have been allocated by the caller
*/
typedef ubyte subkey_t; /* 17 sets of 48 bits */
static void getSubKeys(const simple_des_key_t key, subkey_t ks) {
    ubyte c;/* 56 bits */
    ubyte d;/* 28 bits */
    ubyte kp;
    int i, j;

    /* intialize */
    memset(c, 0, sizeof(c));
    memset(d, 0, sizeof(d));
    memset(ks, 0, sizeof(subkey_t));

    /* permute 'key' using table PC1 */
    for (i = 0; i < 56; ++i) {
      pokeBit(kp, i, peekBit(key, PC1 - 1));
    }

    /* split 'kp' in half and process the resulting series of 'c' and 'd' */
    for (i = 0; i < 28; ++i) {
      pokeBit(c, i, peekBit(kp, i));
      pokeBit(d, i, peekBit(kp, i + 28));
    }

    /* shift the components of c and d */
    for (i = 1; i < 17; ++i) {
      shiftLeft(c, 28, SHIFTS, c);
      shiftLeft(d, 28, SHIFTS, d);
    }

    /* merge 'd' into 'c' */
    for (i = 1; i < 17; ++i) {
      for (j = 28; j < 56; ++j) {
            pokeBit(c, j, peekBit(d, j - 28));
      }
    }

    /* form the sub-keys and store them in 'ks'
   * permute 'c' using table PC2 */
    for (i = 1; i < 17; ++i) {
      for (j = 0; j < 48; ++j) {
            pokeBit(ks, j, peekBit(c, PC2 - 1));
      }
    }
}

/*
* Function used in processing the messages
*
* r: an array of bytes to be processed
* ks: one of the subkeys to be used for processing
* sp: output from the processing
*/
static void f(ubyte *r, ubyte *ks, ubyte *sp) {
    ubyte er; /* 48 bits */
    ubyte sr; /* 32 bits */
    int i;

    /* initialize */
    memset(er, 0, sizeof(er));
    memset(sr, 0, sizeof(sr));

    /* permute 'r' using table E */
    for (i = 0; i < 48; ++i) {
      pokeBit(er, i, peekBit(r, E - 1));
    }

    /* xor 'er' with 'ks' and store back into 'er' */
    for (i = 0; i < 6; ++i) {
      er ^= ks;
    }

    /* process 'er' six bits at a time and store resulting four bits in 'sr' */
    for (i = 0; i < 8; ++i) {
      int j = i * 6;
      int b;
      int k, row, col, m, n;

      for (k = 0; k < 6; ++k) {
            b = peekBit(er, j + k) != 0 ? 1 : 0;
      }

      row = 2 * b + b;
      col = 8 * b + 4 * b + 2 * b + b;
      m = S; /* apply table s */
      n = 1;

      while (m > 0) {
            int p = m % 2;
            pokeBit(sr, (i + 1) * 4 - n, p == 1);
            m /= 2;
            n++;
      }
    }

    /* permute sr using table P */
    for (i = 0; i < 32; ++i) {
      pokeBit(sp, i, peekBit(sr, P - 1));
    }
}

/*
* Processing of block of the message
*
* message: an 8 byte block from the message
* ks: the subkeys to use in processing
* ep: space for an encoded 8 byte block allocated by the caller
*/
static void processMessage(const ubyte *message, subkey_t ks, ubyte *ep) {
    ubyte left;/* 32 bits */
    ubyte right; /* 32 bits */
    ubyte mp;      /* 64 bits */
    ubyte e;         /* 64 bits */
    int i, j;

    /* permute 'message' using table IP */
    for (i = 0; i < 64; ++i) {
      pokeBit(mp, i, peekBit(message, IP - 1));
    }

    /* split 'mp' in half and process the resulting series of 'l' and 'r */
    for (i = 0; i < 32; ++i) {
      pokeBit(left, i, peekBit(mp, i));
      pokeBit(right, i, peekBit(mp, i + 32));
    }
    for (i = 1; i < 17; ++i) {
      ubyte fs; /* 32 bits */

      memcpy(left, right, 4);
      f(right, ks, fs);
      for (j = 0; j < 4; ++j) {
            left ^= fs;
      }
      memcpy(right, left, 4);
    }

    /* amalgamate r and l (in that order) into 'e' */
    for (i = 0; i < 32; ++i) {
      pokeBit(e, i, peekBit(right, i));
    }
    for (i = 32; i < 64; ++i) {
      pokeBit(e, i, peekBit(left, i - 32));
    }

    /* permute 'e' using table IP2 ad return result as a hex string */
    for (i = 0; i < 64; ++i) {
      pokeBit(ep, i, peekBit(e, IP2 - 1));
    }
}

/*
* Encrypts a message using DES
*
* key: the key to use to encrypt the message
* message: the message to be encrypted
* len: the length of the message
*
* returns: a paring of dynamically allocated memory for the encoded message,
*          and the length of the encoded message.
*          the caller will need to free the memory after use.
*/
String encrypt(const simple_des_key_t key, const ubyte *message, int len) {
    String result = { 0, 0 };
    subkey_t ks;
    ubyte padByte;
    int i;

    getSubKeys(key, ks);

    padByte = 8 - len % 8;
    result.len = len + padByte;
    result.data = (ubyte*)malloc(result.len);
    memcpy(result.data, message, len);
    memset(&result.data, padByte, padByte);

    for (i = 0; i < result.len; i += 8) {
      processMessage(&result.data, ks, &result.data);
    }

    return result;
}

/*
* Decrypts a message using DES
*
* key: the key to use to decrypt the message
* message: the message to be decrypted
* len: the length of the message
*
* returns: a paring of dynamically allocated memory for the decoded message,
*          and the length of the decoded message.
*          the caller will need to free the memory after use.
*/
String decrypt(const simple_des_key_t key, const ubyte *message, int len) {
    String result = { 0, 0 };
    subkey_t ks;
    int i, j;
    ubyte padByte;

    getSubKeys(key, ks);
    /* reverse the subkeys */
    for (i = 1; i < 9; ++i) {
      for (j = 0; j < 6; ++j) {
            ubyte temp = ks;
            ks = ks;
            ks = temp;
      }
    }

    result.data = (ubyte*)malloc(len);
    memcpy(result.data, message, len);
    result.len = len;
    for (i = 0; i < result.len; i += 8) {
      processMessage(&result.data, ks, &result.data);
    }

    padByte = result.data;
    result.len -= padByte;
    return result;
}

/*
* Convienience method for showing the round trip processing of a message
*/
void driver(const simple_des_key_t key, const ubyte *message, int len) {
    String encoded, decoded;
    char buffer;

    printBytes(key, KEY_LEN, buffer);
    printf("Key   : %s\n", buffer);

    printBytes(message, len, buffer);
    printf("Message : %s\n", buffer);

    encoded = encrypt(key, message, len);
    printBytes(encoded.data, encoded.len, buffer);
    printf("Encoded : %s\n", buffer);

    decoded = decrypt(key, encoded.data, encoded.len);
    printBytes(decoded.data, decoded.len, buffer);
    printf("Decoded : %s\n\n", buffer);

    /* release allocated memory */
    if (encoded.len > 0) {
      free(encoded.data);
      encoded.data = 0;
    }
    if (decoded.len > 0) {
      free(decoded.data);
      decoded.data = 0;
    }
}

int test_soft_des_ebc(void) {
    const simple_des_key_t keys[] = {
      {0x13, 0x34, 0x57, 0x79, 0x9B, 0xBC, 0xDF, 0xF1},
      {0x0E, 0x32, 0x92, 0x32, 0xEA, 0x6D, 0x0D, 0x73},
      {0x0E, 0x32, 0x92, 0x32, 0xEA, 0x6D, 0x0D, 0x73}
    };
    const ubyte message1[] = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
    const ubyte message2[] = { 0x87, 0x87, 0x87, 0x87, 0x87, 0x87, 0x87, 0x87 };
    const ubyte message3[] = { 0x59, 0x6F, 0x75, 0x72, 0x20, 0x6C, 0x69, 0x70, 0x73, 0x20, 0x61, 0x72, 0x65, 0x20, 0x73, 0x6D, 0x6F, 0x6F, 0x74, 0x68, 0x65, 0x72, 0x20, 0x74, 0x68, 0x61, 0x6E, 0x20, 0x76, 0x61, 0x73, 0x65, 0x6C, 0x69, 0x6E, 0x65, 0x0D, 0x0A };
    int len;

    len = sizeof(message1) / sizeof(ubyte);
    driver(keys, message1, len);

    len = sizeof(message2) / sizeof(ubyte);
    driver(keys, message2, len);

    len = sizeof(message3) / sizeof(ubyte);
    driver(keys, message3, len);
    return 0;
}硬件实现的上述算法, 主要是从性能和编程的易用性考虑. 官方提供的Demo中没有商密系列的例子, 如果要使用这些算法的硬件加速单元, 可以和销售/技术支持联系.

还有上面的列表中说明支持TRNG
这个是真随机数发生器, 也是密码学算法中要使用的, 密码学中使用随机数, 既需要真随机也需要伪随机. 两者区别在于真随机数使用热噪音作为种子, 伪随机数使用特定的输入作为种子. 数学理论上没有真随机数, 类似于无法实现数学中的1/3分压,分频一样. 之所以称作TRNG, 是通过了一定的密码学测试标准, 国内/国际都有相应的测试标准.
uint32_t GetPseudoRand_U32(uint32_t *rand, uint32_t wordLen,uint32_t seed);
uint32_t GetTrueRand_U32(uint32_t *rand, uint32_t wordLen);
同学们可能会说C语言的库中也有随机数函数:
void srand(unsigned int _Seed);
int rand(void);为何不直接使用? 原因还是密码学标准, 不管国际还是国内的密码学标准, 都不会依赖某个libc的随机数实现. libC中的实现只能用于最简单的场景, 如模拟输入测试等等.


文末例子工程中hsm_test分支是硬件加速单元和几个软件算法实现的例子. 可供参考.

以后有闲暇会做软件/硬件 加密算法的benchmark. 本贴先到此为止.

例子代码下载地址点此






6552918 发表于 2022-2-7 22:25

这块是国民的优势,应该好好发挥一下

麻花油条 发表于 2022-2-8 11:04

优势就应该好好发挥

单片小菜 发表于 2022-2-8 16:28

加密算法都是通用的吧?
页: [1]
查看完整版本: 【N32G45XVL-STB_V1.1】开发板评测2: 硬件密码单元测试