读取c中由tab分隔的文件

我是C的新手，读取文件的东西让我发疯…我想读一个包括姓名，出生地和电话号码等文件。所有这些都是用标签分隔的

格式可能是这样的：

Bob Jason Los Angeles 33333333 Alice Wong Washington DC 111-333-222

所以我创建了一个结构来记录它。

 typedef struct Person{ char name[20]; char address[30]; char phone[20]; } Person;

我尝试了很多方法将这个文件读入struct但它失败了。我厌倦了：

 read_file = fopen("read.txt", "r"); Person temp; fread(&temp, sizeof(Person), 100, read_file); printf("%s %s %s \n", temp.name, temp.address, temp.phone);

但是char字符串没有记录到由tab分隔的temp中，它将整个文件读入temp.name并获得奇怪的输出。

然后我尝试了fscanf和sscanf，那些都不适用于分离标签

 fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);

要么

 fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);

这将字符串按空格分开，所以我分别得到Bob和Jason，而实际上，我需要将“Bob Jason”作为一个字符串。在创建文本文件时，我确实通过制表符分隔了这些格式。

对于sscanf来说，我多次尝试过不同的方式……

请帮忙…

我建议：

使用fgets逐行读取文本。
使用标签作为分隔符，使用strtok分隔行的内容。

 // Use an appropriate number for LINE_SIZE #define LINE_SIZE 200 char line[LINE_SIZE]; if ( fgets(line, sizeof(line), read_file) == NULL ) { // Deal with error. } Person temp; char* token = strtok(line, "\t"); if ( token == NULL ) { // Deal with error. } else { // Copy token at most the number of characters // temp.name can hold. Similar logic applies to address // and phone number. temp.name[0] = '\0'; strncat(temp.name, token, sizeof(temp.name)-1); } token = strtok(NULL, "\t"); if ( token == NULL ) { // Deal with error. } else { temp.address[0] = '\0'; strncat(temp.address, token, sizeof(temp.address)-1); } token = strtok(NULL, "\n"); if ( token == NULL ) { // Deal with error. } else { temp.phone[0] = '\0'; strncat(temp.phone, token, sizeof(temp.phone)-1); }

更新

使用辅助函数，可以减小代码的大小。（谢谢@chux）

 // The helper function. void copyToken(char* destination, char* source, size_t maxLen; char const* delimiter) { char* token = strtok(source, delimiter); if ( token != NULL ) { destination[0] = '\0'; strncat(destination, token, maxLen-1); } } // Use an appropriate number for LINE_SIZE #define LINE_SIZE 200 char line[LINE_SIZE]; if ( fgets(line, sizeof(line), read_file) == NULL ) { // Deal with error. } Person temp; copyToken(temp.name, line, sizeof(temp.name), "\t"); copyToken(temp.address, NULL, sizeof(temp.address), "\t"); copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");

这仅用于演示，有更好的方法来初始化变量，但为了说明您的主要问题，即读取由制表符分隔的文件 ，您可以编写如下函数：

假设有一个严格的字段定义和你的结构定义，你可以使用strtok()来获取标记。

 //for a file with constant field definitions void GetFileContents(char *file, PERSON *person) { char line[260]; FILE *fp; char *buf=0; char temp[80]; int i = -1; fp = fopen(file, "r"); while(fgets(line, 260, fp)) { i++; buf = strtok(line, "\t\n"); if(buf) strcpy(person[i].name, buf); buf = strtok(NULL, "\t\n"); if(buf) strcpy(person[i].address, buf); buf = strtok(NULL, "\t\n"); if(buf) strcpy(person[i].phone, buf); //Note: if you have more fields, add more strtok/strcpy sections //Note: This method will ONLY work for consistent number of fields. //If variable number of fields, suggest 2 dimensional string array. } fclose(fp); }

在main（）中调用它，如下所示：

 int main(void) { //... PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere //and there are better ways //this is just for illustration pPerson = &person[0];//initialize pointer to person GetFileContents(filename, pPerson); //call function to populate person. //... return 0; }

第一件事，

 fread(&temp, sizeof(temp), 100, read_file);

因为字段不是固定的宽度，所以它将不起作用，因此对于address ，它将始终读取name 30的20个字符，依此类推，这并不总是正确的事情。

你需要一次读取一行，然后解析该行，你可以使用你喜欢的任何方法来读取类似的，一个简单的方法就是使用fgets()这样

 char line[100]; Person persons[100]; int index; index = 0; while (fgets(line, sizeof(line), read_file) != NULL) { persons[i++] = parseLineAndExtractPerson(line); }

现在我们需要一个函数来解析行并将数据存储在Person结构实例中

 char *extractToken(const char *const line, char *buffer, size_t bufferLength) { char *pointer; size_t length; if ((line == NULL) || (buffer == NULL)) return NULL; pointer = strpbrk(line, "\t"); if (pointer == NULL) length = strlen(line); else length = pointer - line; if (length >= bufferLength) /* truncate the string if it was too long */ length = bufferLength - 1; buffer[length] = '\0'; memcpy(buffer, line, length); return pointer + 1; } Person parseLineAndExtractPerson(const char *line) { Person person; person.name[0] = '\0'; person.address[0] = '\0'; person.phone[0] = '\0'; line = extractToken(line, person.name, sizeof(person.name)); line = extractToken(line, person.address, sizeof(person.address)); line = extractToken(line, person.phone, sizeof(person.phone)); return person; }

这是一个循环的示例实现，最多可读取100条记录

 int main(void) { char line[100]; Person persons[100]; int index; FILE *read_file; read_file = fopen("/path/to/the/file.type", "r"); if (read_file == NULL) return -1; index = 0; while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL)) { size_t length; /* remove the '\n' left by `fgets()'. */ length = strlen(line); if ((length > 0) && (line[length - 1] == '\n')) line[length - 1] = '\0'; persons[index++] = parseLineAndExtractPerson(line); } fclose(read_file); while (--index >= 0) printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone); return 0; }

这是一个完整的程序，可以完成我认为你需要的程序

 #include  #include  #include  typedef struct Person{ char name[20]; char address[30]; char phone[20]; } Person; char *extractToken(const char *const line, char *buffer, size_t bufferLength) { char *pointer; size_t length; if ((line == NULL) || (buffer == NULL)) return NULL; pointer = strpbrk(line, "\t"); if (pointer == NULL) length = strlen(line); else length = pointer - line; if (length >= bufferLength) /* truncate the string if it was too long */ length = bufferLength - 1; buffer[length] = '\0'; memcpy(buffer, line, length); return pointer + 1; } Person parseLineAndExtractPerson(const char *line) { Person person; person.name[0] = '\0'; person.address[0] = '\0'; person.phone[0] = '\0'; line = extractToken(line, person.name, sizeof(person.name)); line = extractToken(line, person.address, sizeof(person.address)); line = extractToken(line, person.phone, sizeof(person.phone)); return person; } int main(void) { char line[100]; Person persons[100]; int index; FILE *read_file; read_file = fopen("/home/iharob/data.dat", "r"); if (read_file == NULL) return -1; index = 0; while (fgets(line, sizeof(line), read_file) != NULL) { size_t length; length = strlen(line); if (line[length - 1] == '\n') line[length - 1] = '\0'; persons[index++] = parseLineAndExtractPerson(line); } fclose(read_file); while (--index >= 0) printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone); return 0; }

解析由fgets返回的字符串可能非常烦人，尤其是在输入被截断时。事实上， fgets还有很多不足之处。你得到了正确的字符串还是更多？最后有换行吗？就此而言，距离最后20个字节还是32768个字节？如果你不需要计算那么多字节两次会很好 – 一次使用fgets ，一次使用strlen ，只是为了删除你不想要的换行符。

像fscanf这样的东西在这种情况下并不一定按预期工作，除非你有C99的“scanset”function，然后如果你有足够的空间，它会自动添加一个空终止符。任何scanf系列的返回值都是您的朋友在确定是否发生成功或失败时。

您可以通过使用%NNc来避免空终止%NNc ，其中NN是宽度，但如果这些NN字节中有\t ，那么您需要将它分开并将其移动到下一个字段，除了这意味着下一个字段中的字节必须在那之后移动到该字段，并且第90个字段将需要将其字节移动到第91个字段…并且希望您只需要执行一次…显然，这实际上也不是解决方案。

鉴于这些原因，我觉得直到你遇到一个预期的分隔符时才会更容易阅读，并且当指定的大小对于空终止符而言太小而无法填充缓冲区时，让你决定函数的行为。无论如何，这是代码。我认为这很简单：

 /* * Read a token. * * tok: The buffer used to store the token. * max: The maximum number of characters to store in the buffer. * delims: A string containing the individual delimiter bytes. * fileptr: The file pointer to read the token from. * * Return value: * - max: The buffer is full. In this case, the string _IS NOT_ null terminated. * This may or may not be a problem: it's your choice. * - (size_t)-1: An I/O error occurred before the last delimiter * (just like with `fgets`, use `feof`). * - any other value: The length of the token as `strlen` would return. * In this case, the string _IS_ null terminated. */ size_t read_token(char *restrict tok, size_t max, const char *restrict delims, FILE *restrict fileptr) { int c; size_t n; for (n = 0; n < max && (c = getchar()) != EOF && strchr(delims, c) == NULL; ++n) *tok++ = c; if (c == EOF) return (size_t)-1; if (n == max) return max; *tok = 0; return n; }

用法非常简单：

 #include  #include  typedef struct person { char name[20]; char address[30]; char phone[20]; } Person; int main(void) { FILE *read_file; Person temp; size_t line_num; size_t len; int c; int exit_status = EXIT_SUCCESS; read_file = fopen("read.txt", "r"); if (read_file == NULL) { fprintf(stderr, "Error opening read.txt\n"); return 1; } for (line_num = 0;; ++line_num) { /* * Used for detecting early EOF * (eg the last line contains only a name). */ temp.name[0] = temp.phone[0] = 0; len = read_token(temp.name, sizeof(temp.name), "\t", read_file); if (len == (size_t)-1) break; if (len == max) { fprintf(stderr, "Skipping bad line %zu\n", line_num + 1); while ((c = getchar()) != EOF && c != '\n') ; /* nothing */ continue; } len = read_token(temp.address, sizeof(temp.address), "\t", read_file); if (len == (size_t)-1) break; if (len == max) { fprintf(stderr, "Skipping bad line %zu\n", line_num + 1); while ((c = getchar()) != EOF && c != '\n') ; /* nothing */ continue; } len = read_token(temp.phone, sizeof(temp.phone), "\t", read_file); if (len == (size_t)-1) break; if (len == max) { fprintf(stderr, "Skipping bad line %zu\n", line_num + 1); while ((c = getchar()) != EOF && c != '\n') ; /* nothing */ continue; } // Do something with the input here. Example: printf("Entry %zu:\n" "\tName: %.*s\n" "\tAddress: %.*s\n" "\tPhone: %.*s\n\n", line_num + 1, (int)sizeof(temp.name), temp.name, (int)sizeof(temp.address), temp.address, (int)sizeof(temp.phone), temp.phone); } if (ferror(read_file)) { fprintf(stderr, "error reading from file\n"); exit_status = EXIT_FAILURE; } else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) { fprintf(stderr, "Unexpected end of file while reading entry %zu\n", line_num + 1); exit_status = EXIT_FAILURE; } //else feof(read_file) is still true, but we parsed a full entry/record fclose(read_file); return exit_status; }

注意如何在read循环中出现完全相同的8行代码来处理read_token的返回值？因此，我认为调用read_token并处理其返回值的另一个函数可能还有空间，允许main简单地调用这个“ read_token处理程序”，但我认为上面的代码为您提供了有关如何使用read_token和它如何适用于您的情况。如果你愿意的话，你可能会以某种方式改变行为，但上面的read_token函数在使用这样的分隔输入时会很适合我（当你在混合中添加引用字段时，事情会复杂一点，但不是很多据我所知，更复杂。您可以决定返回max时会发生什么。我选择将其视为错误，但您可能会另有想法。您甚至可以在n == max时添加额外的getchar ，并将n == max视为成功的返回值，并将(size_t)-2视为“令牌太大”错误指示符。

读取c中由tab分隔的文件

C ++数组分配错误：无效的数组赋值

使用FFMPEG从IP摄像机读取RTCP数据包

如果’＆’没有放入’scanf’语句会怎么样？

我可以在没有Visual Studio的情况下使用Visual C ++编译器吗？

在ASM中，c编译器如何处理函数的结构返回值

处理char缓冲区

strcat实现

C中的OpenSSL“Seal”（或通过shell）

有人有两次包含stdlib.h的原因吗？

Numpy C-Api示例给出了SegFault