
我试图使用C将包含多行空白分隔整数的文件解析为动态int数组的动态数组。 每行都是数组数组中的数组。 每行中的行数和元素是非常量的。



我以为我可以使用sscanf(因为fscanf可以用来解析整个空格分隔的整数文件)。 但是,似乎sscanf具有不同的function。 sscanf只解析字符串中的第一个数字。 我的猜测是,因为该行是一个字符串不是一个流。


char* line; char lineBuffer[BUFFER_SIZE]; FILE *filePtr; int value; 

 while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) { printf("%s\n", lineBuffer); while(sscanf(lineBuffer, "%d ", &value) > 0) { printf("%d\n", value); } } 

有什么东西可以用来解析字符串。 如果没有,是否有整个系统的替代品? 我宁愿不使用REGEX。

使用strtol() ,如果有匹配则给出指向匹配结束的指针,以及用于存储当前位置的char指针:

  while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) { printf("%s\n", lineBuffer); char* p = lineBuffer; while(p < lineBuffer+BUFFER_SIZE ) { char* end; long int value = strtol( p , &end , 10 ); if( value == 0L && end == p ) //docs also suggest checking errno value break; printf("%ld\n", value); p = end ; } } 


2个方法: strtol() (更好的error handling)和sscanf()

 while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) { char *endptr; while (1) { errno = 0; long num = strtol(line, &endptr, 10); if (line == endptr) break; // no conversion if (errno) break; // out of range or other error #if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX // long and int may have different ranges if (num < INT_MIN || num > INT_MAX) { errno = ERANGE; break; // out of range } #endif int value = (int) num; printf("%d\n", value); line = endptr; } while (isspace((unsigned char) *endptr)) endptr++; if (*endptr != '\0') Handle_ExtraGarbageAtEndOfLine(); } 

“sscanf只解析字符串中的第一个数字。” 并非如此。 使用带有"%n" sscanf()记录扫描停止的位置。

 while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) { int n; while (1) { n = 0; int value; if (sscanf(line, "%d %n", &value, &n) != 1) break; printf("%d\n", value); line += n; } if (line[n] != '\0') Handle_ExtraGarbageAtEndOfLine(); } 

使用带有" " (空格)的strtok()函数作为分隔符并将其置于循环中,当strtok()返回NULL以获取每个标记然后从每个标记打印每个数字时终止该循环:

 while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) { printf("%s\n", lineBuffer); char *token=strtok(line," "); while(token!=NULL) { if(sscanf(token, "%d", &value) > 0) printf("%d\n", value); token=strtok(NULL," "); } } 


 lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1)); 


 char lineBuffer[BUFFER_SIZE]; 


只需在输入行上使用一个循环,利用atol()无论如何都会在下一个空格分隔符处停止。 仅适用于正整数;)但速度很快,您无需阅读大量的strtok和sscanf文档,并且在整数之间存在“噪音”的情况下它甚至是强大的。

 void bla() { const char * input = " 1 3 4 6 "; size_t i; size_t len = strlen(input); for (i = 0; i < len; ++i) { if (isdigit(input[i])) { printf("%d\n", atol(&input[i])); while (i < len && isdigit(input[i])) ++i; } } } void bla1() { // positive and negative ints version const char * input = " 10 -3 42 6 "; size_t i; size_t len = strlen(input); for (i = 0; i < len; ++i) { if (!isspace(input[i])) { printf("%d\n", atol(&input[i])); while (i < len && !isspace(input[i])) ++i; } } /* Output: 10 -3 42 6 */ } 

问题的下一部分是(隐式地),如何处理动态数组来存储解析的int值。这是一个基于上面代码的解决方案。 chunkSize设置得太小,无法输入,所以我可以测试realloc代码部分是否也有效。

 typedef struct DataRow_tag { int32_t *data; size_t length; } DataRow_t; // Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced. // (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries. // Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens. // Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.) // The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it. // 0: false -> fail // 1: true -> success! int ReadRowWithUnknownNumberOfColumnsOfInt32 ( const char * row // Zero terminated string containing 1 row worth of data. , DataRow_t *result // Pointer to the place the data will be stored at. ) { int success = 0; size_t chunkSize = 10; // Set this value to something most likely large enough for your application. // This function is not cleaning up your garbage, dude ;) Gimme a clean result structure! assert(NULL != result && NULL == result->data); if (NULL != result && NULL == result->data) { result->length = 0; size_t rowLength = strlen(row); const char *pInput = row; const char *pEnd = &row[rowLength-1]; result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t)); if (NULL != result->data ) { for (; pInput < pEnd; ++pInput) { assert(pInput <= pEnd); assert(*pInput != 0); if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode. { long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t... // TODO: we could test here if lval value fits in an int32_t...platform dependent! result->data[result->length++] = lval; if (result->length == chunkSize) { // our buffer was too small... we need a bigger one. chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now. int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t)); if (NULL == temp) { // realloc is a funny function from the dark ages of c. It returns NULL if out of memory. // So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak. free(result->data); result->length = 0; break; } else { result->data = temp; } } while (pInput < pEnd && !isspace(*pInput)) ++pInput; } } if (pInput >= pEnd) success = 1; else { // make sure we do not leave result in some funny state. result->length = 0; free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns. result->data = NULL; } } } return success; } void Bla2() { const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13"; DataRow_t dataRow = { 0 }; if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow)) { for (size_t i = 0; i < dataRow.length; ++i) { printf("%d ", dataRow.data[i]); } printf("\n"); free(dataRow.data); dataRow.data = NULL; dataRow.length = 0; } }