在ANSI C中计算文本文件中的行和单词的最快方法是什么?

在纯ANSI C中计算文本文件中的行和单词的最快方法是什么?

单词以空格或句点终止。 行以'\n'结尾。

这似乎是在C ++中。

也许看一下GNU wc实用程序的源代码,因为这个实用程序正是你想要的。

 #include  #include  #include  typedef unsigned long count_t; /* Counter type */ /* Current file counters: chars, words, lines */ count_t ccount; count_t wcount; count_t lcount; /* Totals counters: chars, words, lines */ count_t total_ccount = 0; count_t total_wcount = 0; count_t total_lcount = 0; /* Print error message and exit with error status. If PERR is not 0, display current errno status. */ static void error_print (int perr, char *fmt, va_list ap) { vfprintf (stderr, fmt, ap); if (perr) perror (" "); else fprintf (stderr, "\n"); exit (1); } /* Print error message and exit with error status. */ static void errf (char *fmt, ...) { va_list ap; va_start (ap, fmt); error_print (0, fmt, ap); va_end (ap); } /* Print error message followed by errno status and exit with error code. */ static void perrf (char *fmt, ...) { va_list ap; va_start (ap, fmt); error_print (1, fmt, ap); va_end (ap); } /* Output counters for given file */ void report (char *file, count_t ccount, count_t wcount, count_t lcount) { printf ("%6lu %6lu %6lu %s\n", lcount, wcount, ccount, file); } /* Return true if C is a valid word constituent */ static int isword (unsigned char c) { return isalpha (c); } /* Increase character and, if necessary, line counters */ #define COUNT(c) \ ccount++; \ if ((c) == '\n') \ lcount++; /* Get next word from the input stream. Return 0 on end of file or error condition. Return 1 otherwise. */ int getword (FILE *fp) { int c; int word = 0; if (feof (fp)) return 0; while ((c = getc (fp)) != EOF) { if (isword (c)) { wcount++; break; } COUNT (c); } for (; c != EOF; c = getc (fp)) { COUNT (c); if (!isword (c)) break; } return c != EOF; } /* Process file FILE. */ void counter (char *file) { FILE *fp = fopen (file, "r"); if (!fp) perrf ("cannot open file `%s'", file); ccount = wcount = lcount = 0; while (getword (fp)) ; fclose (fp); report (file, ccount, wcount, lcount); total_ccount += ccount; total_wcount += wcount; total_lcount += lcount; } int main (int argc, char **argv) { int i; if (argc < 2) errf ("usage: wc FILE [FILE...]"); for (i = 1; i < argc; i++) counter (argv[i]); if (argc > 2) report ("total", total_ccount, total_wcount, total_lcount); return 0; } 

发现于: http : //www.gnu.org/software/cflow/manual/html_node/Source-of-wc-command.html

  • 阅读文件
  • 迭代字符增量字符计数器
  • 检查空格/行尾是否增加字计数器
  • 重复第二步和第三步直到EOF

这是一个明确的答案,它计算行数(对于在OP中链接的C ++版本,单词数量的扩展是微不足道的)。 此版本已缓冲。 另一个答案建议首先读取整个文件,这更简单,但下面更符合您的C ++示例。

 #include  #include  #define BUFSIZE 1024 int main(int argc, char** argv) { int newlines = 0; char buf[BUFSIZE]; FILE* file; if (argc != 2) return 1; file = fopen(argv[1], "r"); while (fgets(buf, BUFSIZE, file)) { if (!(strlen(buf) == BUFSIZE-1 && buf[BUFSIZE-2] != '\n')) newlines++; } printf("Number of lines in %s: %d\n", argv[1], newlines); return 0; } 

BUFSIZE宏可以调整以最大化性能(因为你说你想要最快的方式)。 1024只是猜测。 另一种可能是读取映射的文件内存,但我没有尝试,因为mmap不是ANSI C.