删除C程序中的所有注释 – 对此代码的任何可能的改进?

我正在从K&R书中学习C,在第一章中学习练习1.23,我必须编写一个程序,删除用户输入的一些C代码的所有注释。 到目前为止,这是我完成的程序。 我可以对它做出任何改进吗?

/** Tuesday, 10/07/2013 Exercise 1.23 Write a program to remove all comments from a C program. Don't forget to handle quoted strings and character constants properly. C comments don't nest. **/ #include  #define MAX_LENGTH 65536 #define NOT_IN_COMMENT 0 #define SINGLE_COMMENT 1 #define MULTI_COMMENT 2 main() { char code[MAX_LENGTH]; /* Buffer that stores the inputted code */ int size = 0; /* Length of the inputted code */ int loop; /* Integer used for the for loop */ char c; /* Character to input into */ int status = NOT_IN_COMMENT; /* Are we in a comment? What type? */ int in_string = 0; /* Are we inside of a string constant? */ char last_character; /* Value of the last character */ /* Input all code into the buffer until escape sequence pressed */ while ((c = getchar()) != EOF) code[size++] = c; code[size] = '\0'; /* Remove all comments from the code and display results to user */ for (loop = 0; loop < size; loop++) { char current = code[loop]; if (in_string) { if (current == '"') in_string = 0; putchar(current); } else { if (status == NOT_IN_COMMENT) { if (current == '"') { putchar(current); in_string = 1; continue; } if (current == '/' && last_character == '/') status = SINGLE_COMMENT; else if (current == '*' && last_character == '/') status = MULTI_COMMENT; else if (current != '/' || (current == '/' && loop < size-1 && !(code[loop+1] == '/' || code[loop+1] == '*'))) putchar(current); } else if (status == SINGLE_COMMENT) { if (current == '\n') { status = NOT_IN_COMMENT; putchar('\n'); } } else if (status == MULTI_COMMENT) { if (current == '/' && last_character == '*') status = NOT_IN_COMMENT; } } last_character = current; } } 

将你的注释剥离到一个函数(更有用),并使用fgets()一次读取一行,last_character是不明确的(它是指最后一个,还是以前的?),这使用了很少的putchar()调用,只有每行一个printf(可以使用puts),保留你正在做的大部分,

 #include  #include  #define MAX_LENGTH 65536 #define NOT_IN_COMMENT 0 #define SINGLE_COMMENT 1 #define MULTI_COMMENT 2 int status = NOT_IN_COMMENT; /* Are we in a comment? What type? */ int in_string = 0; /* Are we inside of a string constant? */ char* stripcomments(char* stripped,char* code) { int ndx; /* index for code[] */ int ondx; /* index for output[] */ char prevch; /* Value of the previous character */ char ch; /* Character to input into */ /* Remove all comments from the code and display results to user */ for (ndx=ondx=0; ndx < strlen(code); ndx++) { char current = code[ndx]; if (in_string) { if (current == '"') in_string = 0; stripped[ondx++] = current; } else { if (status == NOT_IN_COMMENT) { if (current == '"') { stripped[ondx++] = current; in_string = 1; continue; } if (current == '/' && prevch == '/') status = SINGLE_COMMENT; else if (current == '*' && prevch == '/') status = MULTI_COMMENT; else if (current != '/' || (current == '/' && ndx < strlen(code)-1 && !(code[ndx+1] == '/' || code[ndx+1] == '*'))) stripped[ondx++] = current; } else if (status == SINGLE_COMMENT) { if (current == '\n') { status = NOT_IN_COMMENT; stripped[ondx++] = '\n'; } } else if (status == MULTI_COMMENT) { if (current == '/' && prevch == '*') status = NOT_IN_COMMENT; } } prevch = current; } stripped[ondx] = '\0'; return(stripped); } int main(void) { char code[MAX_LENGTH]; /* Buffer that stores the inputted code */ char stripped[MAX_LENGTH]; while( fgets(code,sizeof(code),stdin) ) { //printf("%s\n",code); //strip comments... stripcomments(stripped,code); if( strlen(stripped) > 0 ) printf("%s",stripped); } } 

我会留给你删除额外的空白行。

当你处理被引用的字符串时,你应该检测转义引号( \" )。例如"\"/* not a comment */\""是一个有效的字符串,但我认为你的代码将从中间删除错误的注释它

如果你想要真正正确,你还应该处理行继续(在下一行继续以\结尾的行)。 为了增加毛羽,你也应该处理三角形。 ??/"是一个转义引用,并且在一行的末尾是一个延续。

代码的样式看起来很不错,虽然main应该更恰当地声明为int main(void)

对我来说很好,干得好!

也许可以通过添加一些评论来改进:)作为一个粗略的指南,为每个条件添加一个。 你正在评论,但在循环内部的多汁部分停了下来。 但代码看起来很可读。

它有用吗? 你测试过吗?

如果我有一个包含转义双引号的字符串,看起来可能会失败……例如"He said, \"Hello, World!\""

 //GH PATEL COLLEGE OF ENGINEERING & TECHNOLOGY. //c program to remove comments from given src.txt file, and write back to dest.txt file. #include  int main() { FILE *src,*dest; char ch,pre,line[100]; int nflag,qflag,index; src=fopen("src.txt","r+"); dest=fopen("dest.txt","w+"); nflag=1; while(!feof(src)) { index=0; for(ch=fgetc(src);ch!=EOF && ch!='\n';) { if(ch=='"'&&pre!='\\') { qflag=0; for(;ch!='\n' && qflag==0;) { line[index++]=ch; pre=ch; ch=fgetc(src); if(ch=='"'&&pre!='\\') { qflag=1; line[index++]=ch; pre=ch; ch=fgetc(src); break; } } } else if(ch=='/') { pre=ch; ch=fgetc(src); if(ch=='/') { for(;fgetc(src)!='\n';); break; } if(ch=='*') { nflag=1; for(ch=fgetc(src);nflag==1;) { if(ch=='*') { pre=ch; ch=fgetc(src); if(ch=='/') { nflag=0; } } else { pre=ch; ch=fgetc(src); } } } } else { line[index++]=ch; pre=ch; ch=fgetc(src); } } line[index]='\0'; if(index>0) { line[index] = '\0'; fprintf(dest,"%s\n",line); fflush(stdin); } } getch(); fclose(src); fclose(dest); return 0; } 

我喜欢这个post在我的项目中包含一个“评论剥离器”,然后再将它交给JSON解析器。 只有我更喜欢FSM方法。 希望我的实现对任何人都是可以理解和有用的:

 #include  #include  void strip(int ch, FILE *stream) { static enum strip_states { STRIP_STATE_PUTC = 0, STRIP_STATE_SINGLE, STRIP_STATE_MULTI, STRIP_STATE_STRING, } state = STRIP_STATE_PUTC; static char _ch = 0; static unsigned char _nestlevel = 0; /* String */ if (state == STRIP_STATE_PUTC && ch == '"') { state = STRIP_STATE_STRING; } else if (state == STRIP_STATE_STRING && ch == '"') { state = STRIP_STATE_PUTC; /* Multiline */ } else if (_ch == '/' && ch == '*') { if (state == STRIP_STATE_PUTC) state = STRIP_STATE_MULTI; else if (state == STRIP_STATE_MULTI) _nestlevel++; } else if (_ch == '*' && ch == '/') { if (state == STRIP_STATE_MULTI && _nestlevel > 0) _nestlevel--; else if (state == STRIP_STATE_MULTI && _nestlevel == 0) state = STRIP_STATE_PUTC; /* Singleline */ } else if (state == STRIP_STATE_PUTC && _ch == '/' && ch == '/') { state = STRIP_STATE_SINGLE; } else if (state == STRIP_STATE_SINGLE && ch == '\n') { state = STRIP_STATE_PUTC; } /* Put character */ if ((state == STRIP_STATE_PUTC && ch != '/') || state == STRIP_STATE_STRING) fputc(ch, stream); _ch = ch; } int main(void) { int ch; while ((ch = fgetc(stdin)) != EOF) strip(ch, stdout); return 0; } 

什么有效:

  • 单行评论"xxx // comment"
  • 正常的多行评论"xxx /* comment\n another comment */ yyy"
  • 嵌套注释"xxx /* comment /* nested comment */ end of comment */ yyy

目前尚未实施和测试:

  • 单行评论背后的多行评论
  • 线延续
  • 逃脱的角色

亲切的问候,杰里

您可以参考下面的简单代码:

 #include  int main(int argc, char **argv) { char code[1000]; char output[1000]; char ch; int i = 0; //store code in array while ((ch = getchar()) != EOF) { code[i++] = ch; } code[i] = '\0'; int index = 0; i = 0; //store removed comment code in output while (code[i] != EOF) { if (code[i] == '/' && code[i + 1] == '/') { //to remove single line comments while (code[i] != '\n') i++; } else if (code[i] == '/' && code[i + 1] == '*') { //to remove multi line comments i = i + 2; while (code[i] != '*' && code[i + 1] != '/') { i++; } i = i + 3; } else { //store the rest of the code in output array output[index++] = code[i++]; } } output[index] = '\0'; printf("%s", output); } 

INPUT:

 #include void main() { printf("Hello"); /*-------------------------------------------- ------------------Ignored by compiler------- -------------------------------------------- */ printf("By"); } 

OUTPUT:

 #include void main() { printf("Hello"); printf("By"); }