如何在C中将hex字符串转换为二进制字符串

我有一个hex值的文本文件。 现在我需要将hex值转换为二进制,并需要将其保存在另一个文件中。 但我不知道如何将hex值转换为二进制字符串! 请帮忙…

const char input[] = "..."; // the value to be converted char res[9]; // the length of the output string has to be n+1 where n is the number of binary digits to show, in this case 8 res[8] = '\0'; int t = 128; // set this to s^(n-1) where n is the number of binary digits to show, in this case 8 int v = strtol(input, 0, 16); // convert the hex value to a number while(t) // loop till we're done { strcat(res, t < v ? "1" : "0"); if(t < v) v -= t; t /= 2; } // res now contains the binary representation of the number 

作为替代方案(假设没有像"0x3A"那样的前缀):

 const char binary[16][5] = {"0000", "0001", "0010", "0011", "0100", ...}; const char digits = "0123456789abcdef"; const char input[] = "..." // input value char res[1024]; res[0] = '\0'; int p = 0; while(input[p]) { const char *v = strchr(digits, tolower(input[p++])); if (v) strcat(res, binary[v - digits]); } // res now contains the binary representation of the number 

真的很简单,因为翻译是逐位的。

 0 - 0000 1 - 0001 2 - 0010 3 - 0011 4 - 0100 5 - 0101 6 - 0110 7 - 0111 8 - 1000 9 - 1001 A - 1010 B - 1011 C - 1100 D - 1101 E - 1110 F - 1111 

因此,例如,hex数FE2F8将是二进制的11111110001011111000

有很多方法可以解决这个问题,使用一些算术来从ascii字符范围0-9和af(或AF)转换为二进制。 我想找到一个只使用查找表和基准测试的解决方案,而不是使用算术的解决方案。 勉强的是,上面的答案都没有实现纯粹的算术解决方案,有些答案甚至假设“转换为二进制”意味着转换为ascii字符串“0”和“1”。

让我们先做一些设置。 首先,我们希望将整个测试数据放在内存中,以避免磁盘I / O影响测试。 下面是我如何使用104857600字节的字符数组“testdata”创建一个标题,大约105 MB。 由于问题是如何转换文件,我们的实现应该快速处理大数据。

 $ { printf "char *testdata =\""; cat /dev/urandom \ | tr -d -c "0123456789abcdefABCDEF" \ | dd count=100 iflag=fullblock bs=1M; printf "\";\n" } > testdata.h 

接下来,我们创建查找表。 我看到两种可能的方法来使用查找表来解决这个问题。 查找表将单个asciihex字符映射到半字节,或者将两个hex字符映射到完整字节。 在前一种情况下,查找表必须有256个条目。 在后一种情况下,查找表必须具有256 * 256 = 65536个条目。 我们可以通过实现永远不会使用第一个字节的第一位来减小后者的大小。 所以我们只需要一个128 * 256 = 32768条目的查找表。 由于该解决方案还需要额外的计算步骤(应用位掩码),我们将对两者进行基准测试。 我们最终得到以下测试用例:

  1. 算术解
  2. 256个条目查找表
  3. 32768个条目查找表
  4. 65536个条目查找表

使用一些python很容易生成第一个查找表:

 #!/usr/bin/env python import sys,struct sys.stdout.write("unsigned char base16_decoding_table1[256] = {\n") for i in xrange(256): try: j = str(int(chr(i), 16)) except: j = '0' sys.stdout.write(j+',') sys.stdout.write("};\n") sys.stdout.write("\n") l = 128*256*["0"] for a in ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F']: for b in ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F']: l[struct.unpack(" 70: sys.stdout.write(line+"\n") line = "" sys.stdout.write(line+"};\n") sys.stdout.write("\n") l = 256*256*["0"] for a in ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F']: for b in ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F']: l[struct.unpack(" 70: sys.stdout.write(line+"\n") line = "" sys.stdout.write(line+"};\n") 

然后:

 python gen.py > base16_decoding_table.h 

现在我们可以编写一些C代码进行测试。

 #include  #include  #include  #include "testdata.h" #include "base16_decoding_table.h" #define TESTDATALEN 104857600 /* the resulting binary string is half the size of the input hex string * because every two hex characters map to one byte */ unsigned char result[TESTDATALEN/2]; void test1() { size_t i; char cur; unsigned char val; for (i = 0; i < TESTDATALEN; i++) { cur = testdata[i]; if (cur >= 97) { val = cur - 97 + 10; } else if (cur >= 65) { val = cur - 65 + 10; } else { val = cur - 48; } /* even characters are the first half, odd characters the second half * of the current output byte */ if (i%2 == 0) { result[i/2] = val << 4; } else { result[i/2] |= val; } } } void test2() { size_t i; char cur; unsigned char val; for (i = 0; i < TESTDATALEN; i++) { cur = testdata[i]; val = base16_decoding_table1[(int)cur]; /* even characters are the first half, odd characters the second half * of the current output byte */ if (i%2 == 0) { result[i/2] = val << 4; } else { result[i/2] |= val; } } } void test3() { size_t i; uint16_t *cur; unsigned char val; for (i = 0; i < TESTDATALEN; i+=2) { cur = (uint16_t*)(testdata+i); // apply bitmask to make sure that the first bit is zero val = base16_decoding_table2[*cur & 0x7fff]; result[i/2] = val; } } void test4() { size_t i; uint16_t *cur; unsigned char val; for (i = 0; i < TESTDATALEN; i+=2) { cur = (uint16_t*)(testdata+i); val = base16_decoding_table3[*cur]; result[i/2] = val; } } #define NUMTESTS 1000 int main() { struct timespec before, after; unsigned long long checksum; int i; double elapsed; clock_gettime(CLOCK_MONOTONIC, &before); for (i = 0; i < NUMTESTS; i++) { test1(); } clock_gettime(CLOCK_MONOTONIC, &after); checksum = 0; for (i = 0; i < TESTDATALEN/2; i++) { checksum += result[i]; } printf("checksum: %llu\n", checksum); elapsed = difftime(after.tv_sec, before.tv_sec) + (after.tv_nsec - before.tv_nsec)/1.0e9; printf("arithmetic solution took %f seconds\n", elapsed); clock_gettime(CLOCK_MONOTONIC, &before); for (i = 0; i < NUMTESTS; i++) { test2(); } clock_gettime(CLOCK_MONOTONIC, &after); checksum = 0; for (i = 0; i < TESTDATALEN/2; i++) { checksum += result[i]; } printf("checksum: %llu\n", checksum); elapsed = difftime(after.tv_sec, before.tv_sec) + (after.tv_nsec - before.tv_nsec)/1.0e9; printf("256 entries table took %f seconds\n", elapsed); clock_gettime(CLOCK_MONOTONIC, &before); for (i = 0; i < NUMTESTS; i++) { test3(); } clock_gettime(CLOCK_MONOTONIC, &after); checksum = 0; for (i = 0; i < TESTDATALEN/2; i++) { checksum += result[i]; } printf("checksum: %llu\n", checksum); elapsed = difftime(after.tv_sec, before.tv_sec) + (after.tv_nsec - before.tv_nsec)/1.0e9; printf("32768 entries table took %f seconds\n", elapsed); clock_gettime(CLOCK_MONOTONIC, &before); for (i = 0; i < NUMTESTS; i++) { test4(); } clock_gettime(CLOCK_MONOTONIC, &after); checksum = 0; for (i = 0; i < TESTDATALEN/2; i++) { checksum += result[i]; } printf("checksum: %llu\n", checksum); elapsed = difftime(after.tv_sec, before.tv_sec) + (after.tv_nsec - before.tv_nsec)/1.0e9; printf("65536 entries table took %f seconds\n", elapsed); return 0; } 

让我们编译一下:

 $ gcc -O3 -g -Wall -Wextra test.c 

并运行它:

 $ ./a.out 

结果:

  1. 算术解: 437.17 s
  2. 256个条目查找表: 117.80秒
  3. 32768个条目查找表: 52.33 s
  4. 65536个条目查找表: 44.66秒

这样我们就可以得出结论,查找表随时都能胜过算术解决方案,浪费大量查找表的内存可能值得额外的运行时间。

 void hex_binary(char * res){ char binary[16][5] = {"0000", "0001", "0010", "0011", "0100", "0101","0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110","1111"}; char digits [] = "0123456789abcdef"; const char input[] = "a9e6"; // input value res[0] = '\0'; int p = 0; int value =0; while(input[p]) { const char *v = strchr(digits, tolower(input[p])); if(v[0]>96){ value=v[0]-87; } else{ value=v[0]-48; } if (v){ strcat(res, binary[value]); } p++; } printf("Res:%s\n", res); } 
 void printBin(unsigned int num){ char str[sizeof(num)*8]; char *p = str; for(*p='0'; num; num/=2) { *p++='0'+num%2; } //store remainders for(--p; p>=str; putchar(*p--)) {;} //print remainders in reverse putchar('\n'); } 

最快最简单的方法是读取hex文件,并且对于每个字符(’0’到’F’)读取,执行等效(0到15)二进制值的表查找。 像往常一样,有更优雅的方式,但这非常简单,可能是这样的:

 switch (charval) { case '0': binval = 0; case '1': binval = 1; case '2': binval = 2; case '3': binval = 3; .... case 'a': binval = 10; case 'b': binval = 11; case 'A': binval = 10; case 'B': binval = 11; .... case 'f': binval = 15; case 'F': binval = 15; default: binval = -1; // error case } 

现在,您必须使用移位和IOR / ADD从这些单独的4位二进制值构造所需大小的单词。

这是我将HEX转换为BIN的函数,字节为一个字节。

 void HexToBin(char hex_number, char* bit_number) { int max = 128; for(int i = 7 ; i >-1 ; i--){ bit_number [i] = (hex_number & max ) ? 1 : 0; max >>=1; } } 

以及对函数的调用:

 void main (void){ char hex_number = 0x6E; //0110 1110 char bit_number[8]={0,0,0,0,0,0,0,0}; HexToBin(hex_number,bit_number); for(int i = 7 ; i >-1 ; i--) printf("%d",bit_number[i]); printf("\n"); system("pause"); } 

这是MSDOS的答案:

 01101110 Press a key to continue . . . 

很容易!

 #include  int main() { long int binaryNumber, hexadecimalNumber = 0, j = 1, remainder; printf("Enter any number any binary number: "); scanf("%ld", &binaryNumber); while(binaryNumber != 0) { remainder = binaryNumber % 10; hexadecimalNumber = hexadecimalNumber + remainder * j; j = j * 2; binaryNumber = binaryNumber / 10; } printf("Equivalent hexadecimal value: %X", hexadecimalNumber); return 0; }