文件中注释的字符数(C编程)

我似乎无法做对,尝试了一切,但..

int commentChars() { char str[256], fileName[256]; FILE *fp; int i; do{ long commentCount=0; fflush(stdin); printf("%s\nEnter the name of the file in %s/", p, dir); gets(fileName); if(!(fp=fopen(fileName, "r"))) { printf("Error! File not found, try again"); return 0; } while(!feof(fp)) { fgets(str,sizeof str,fp); for(int i=0;i<=sizeof str;i++) { if(str[i] == '/' && str[i+1] == '/') { commentCount += (strlen(str)-2); } } } fclose(fp); printf("All the chars, contained in a comment: %ld\n", commentCount); puts(p); printf("Do you want to search for another file?: "); i=checker(); }while(i);} 

结果是“所有的字符,包含在注释中:0”,即使我有评论。 而我的第二个问题是……类似地,我如何对评论做同样的事情,包含/ * * /,对我来说似乎是一项不可能的工作。

我认为你最好使用正则表达式。 他们看起来很可怕,但对于像这样的事情,他们真的没那么糟糕。 你总是可以尝试打一些正则表达式高尔夫练习;-)

我会按如下方式处理:

  • 构建一个捕获注释的正则表达式
  • 扫描您的文件
  • 计算匹配中的字符数

使用一些正则表达式代码和一些关于匹配C中的注释 ,我一起攻击这个应该允许你计算块样式注释/ * * / – 包括分隔符的所有字节。 我只在OS X上测试过它。我想你可以处理剩下的事情吗?

 #include  #include  #include  #define MAX_ERROR_MSG 0x1000 int compile_regex(regex_t *r, char * regex_text) { int status = regcomp (r, regex_text, REG_EXTENDED|REG_NEWLINE|REG_ENHANCED); if (status != 0) { char error_message[MAX_ERROR_MSG]; regerror (status, r, error_message, MAX_ERROR_MSG); printf ("Regex error compiling '%s': %s\n", regex_text, error_message); return 1; } return 0; } int match_regex(regex_t *r, const char * to_match, long long *nbytes) { /* Pointer to end of previous match */ const char *p = to_match; /* Maximum number of matches */ size_t n_matches = 10; /* Array of matches */ regmatch_t m[n_matches]; while(1) { int i = 0; int nomatch = regexec (r, p, n_matches, m, 0); if(nomatch) { printf("No more matches.\n"); return nomatch; } //Just handle first match (the entire match), don't care //about groups int start; int finish; start = m[0].rm_so + (p - to_match); finish = m[0].rm_eo + (p - to_match); *nbytes += m[0].rm_eo - m[0].rm_so; printf("match length(bytes) : %lld\n", m[0].rm_eo - m[0].rm_so); printf("Match: %.*s\n\n", finish - start, to_match + start); p += m[0].rm_eo; } return 0; } int main(int argc, char *argv[]) { regex_t r; char regex_text[128] = "/\\*(.|[\r\n])*?\\*/"; long long comment_bytes = 0; char *file_contents; size_t input_file_size; FILE *input_file; if(argc != 2) { printf("Usage : %s ", argv[0]); return 0; } input_file = fopen(argv[1], "rb"); fseek(input_file, 0, SEEK_END); input_file_size = ftell(input_file); rewind(input_file); file_contents = malloc(input_file_size * (sizeof(char))); fread(file_contents, sizeof(char), input_file_size, input_file); compile_regex(&r, regex_text); match_regex(&r, file_contents, &comment_bytes); regfree(&r); printf("Found %lld bytes in comments\n", comment_bytes); return 0; } 

对代码进行这种基本上微不足道的修改可以解决代码中的几个问题。

  1. 你不应该像那样使用feof() – `while(!feof(file))总是错误的 。
  2. 您不应该读取不属于刚读取的字符串的数据。

我还重构了你的代码,以便该函数获取文件名,打开,计数和关闭它,并报告它找到的内容。

 #include  #include  // Revised interface - process a given file name, reporting static void commentChars(char const *file) { char str[256]; FILE *fp; long commentCount = 0; if (!(fp = fopen(file, "r"))) { fprintf(stderr, "Error! File %s not found\n", file); return; } while (fgets(str, sizeof(str), fp) != 0) { int len = strlen(str); for (int i = 0; i <= len; i++) { if (str[i] == '/' && str[i + 1] == '/') { commentCount += (strlen(str) - 2); break; } } } fclose(fp); printf("%s: Number of characters contained in comments: %ld\n", file, commentCount); } int main(int argc, char **argv) { if (argc == 1) commentChars("/dev/stdin"); else { for (int i = 1; i < argc; i++) commentChars(argv[i]); } return 0; } 

在源代码( ccc.c )上运行时,它会产生:

 ccc.c: Number of characters contained in comments: 58 

评论并不是真的完整(oops),但它可以显示正在发生的事情。 它计算了fgets()保留的换行符作为注释的一部分,尽管//引入者不计算在内。

处理/*评论更难。 你需要找到一个斜线后跟一个星号,然后读到下一个星形斜杠字符对。 使用字符输入比逐行输入更容易实现这一点; 至少,你需要能够将字符分析与行输入交错。

当你准备好它时,你可以尝试对你的程序进行酷刑测试。 这是我用来检查我的评论剥离器,SCC(它不处理三字母 - 通过有意识的决定;如果源包含三字符,我有一个三元组去除器,我首先在源上使用)。

 /* @(#)File: $RCSfile: scc.test,v $ @(#)Version: $Revision: 1.7 $ @(#)Last changed: $Date: 2013/09/09 14:06:33 $ @(#)Purpose: Test file for program SCC @(#)Author: J Leffler */ /*TABSTOP=4*/ // -- C++ comment /* Multiline C-style comment #ifndef lint static const char sccs[] = "@(#)$Id: scc.test,v 1.7 2013/09/09 14:06:33 jleffler Exp $"; #endif */ /* Multi-line C-style comment with embedded /* in line %C% which should generate a warning if scc is run with the -w option Two comment starts /* embedded /* in line %C% should generate one warning */ /* Comment */ Non-comment /* Comment Again */ Non-Comment Again /* Comment again on the next line */ // A C++ comment with a C-style comment marker /* in the middle This is plain text under C++ (C99) commenting - but comment body otherwise // A C++ comment with a C-style comment end marker */ in the middle The following C-style comment end marker should generate a warning if scc is run with the -w option */ Two of these */ generate */ one warning It is possible to have both warnings on a single line. Eg: */ /* /* */ */ SCC has been trained to handle 'q' single quotes in most of the aberrant forms that can be used. '\0', '\\', '\'', '\\ n' (a valid variant on '\n'), because the backslash followed by newline is elided by the token scanning code in CPP before any other processing occurs. This is a legitimate equivalent to '\n' too: '\ \n', again because the backslash/newline processing occurs early. The non-portable 'ab', '/*', '*/', '//' forms are handled OK too. The following quote should generate a warning from SCC; a compiler would not accept it. ' \n' " */ /* SCC has been trained to know about strings /* */ */"! "\"Double quotes embedded in strings, \\\" too\'!" "And \ newlines in them" "And escaped double quotes at the end of a string\"" aa '\\ n' OK aa "\"" aa "\ \n" This is followed by C++/C99 comment number 1. // C++/C99 comment with \ continuation character \ on three source lines (this should not be seen with the -C flag) The C++/C99 comment number 1 has finished. This is followed by C++/C99 comment number 2. /\ /\ C++/C99 comment (this should not be seen with the -C flag) The C++/C99 comment number 2 has finished. This is followed by regular C comment number 1. /\ *\ Regular comment *\ / The regular C comment number 1 has finished. /\ \/ This is not a C++/C99 comment! This is followed by C++/C99 comment number 3. /\ \ \ / But this is a C++/C99 comment! The C++/C99 comment number 3 has finished. /\ \* This is not a C or C++ comment! This is followed by regular C comment number 2. /\ */ This is a regular C comment *\ but this is just a routine continuation *\ and that was not the end either - but this is *\ \ / The regular C comment number 2 has finished. This is followed by regular C comment number 3. /\ \ \ \ * C comment */ The regular C comment number 3 has finished. Note that \u1234 and \U0010FFF0 are legitimate Unicode characters (officially universal character names) that could appear in an id\u0065ntifier, a '\u0065' character constant, or in a "char\u0061cter\ string". Since these are mapped long after comments are eliminated, they cannot affect the interpretation of /* comments */. In particular, none of \u0002A. \U0000002A, \u002F and \U0000002F ever constitute part of a comment delimiter ('*' or '/'). More double quoted string stuff: if (logtable_out) { sprintf(logtable_out, "insert into %s (bld_id, err_operation, err_expected, err_sql_stmt, err_sql_state)" " values (\"%s\", \"%s\", \"%s\", \"", str_logtable, blade, operation, expected); /* watch out for embedded double quotes. */ } /* Non-terminated C-style comment at the end of the file 
 #include  size_t counter(FILE *fp){ int ch, chn; size_t count = 0; enum { none, in_line_comment, in_range_comment, in_string, in_char_constant } status; #if 0 in_range_comment : /* this */ in_line_comment : //this in_string : "this" in_char_constnt : ' ' #endif status = none; while(EOF!=(ch=fgetc(fp))){ switch(status){ case in_line_comment : if(ch == '\n'){ status = none; } ++count; continue; case in_range_comment : if(ch == '*'){ chn = fgetc(fp); if(chn == '/'){ status = none; continue; } ungetc(chn, fp); } ++count; continue; case in_string : if(ch == '\\'){ chn = fgetc(fp); if(chn == '"'){ continue; } ungetc(chn, fp); } else { if(ch == '"') status = none; } continue; case in_char_constant : if(ch == '\\'){ chn = fgetc(fp); if(chn == '\''){ continue; } ungetc(chn, fp); } else { if(ch == '\'') status = none; } continue; case none : switch(ch){ case '/': if('/' == (chn = fgetc(fp))){ status = in_line_comment; continue; } else if('*' == chn){ status = in_range_comment; continue; } else ungetc(chn, fp); break; case '"': status = in_string; break; case '\'': status = in_char_constant; break; } } } return count; } int main(void){ FILE *fp = stdin; size_t c = counter(fp); printf("%lu\n", c); return 0; }