用于在C中解析iCalendar文件的工具集

我需要在C中解析一个ics文件,并将逐行进行。 每条线的格式可能有很大不同,但通常都有标准。

以下是我注意到的一些规则:

  • 有一个属性名称
  • 可选参数,每个参数以分号开头
    • 也可以有CSV
    • 可以是双引号值,在这种情况下,需要忽略逗号,半冒号和冒号之类的内容
  • 结肠
  • 适当的价值

这是一个需要解析的示例ics组件:

UID:uid1@example.com DTSTAMP:19970714T170000Z ORGANIZER;CN=John Doe:MAILTO:john.doe@example.com CATEGORIES:Project Report, XYZ, Weekly Meeting DTSTART:19970714T170000Z DTEND:19970715T035959Z SUMMARY:Bastille Day Party 

你会注意到像MAILTO这样的东西: 。 只解析第一个冒号,冒号之后的其余冒号是属性值。

使用类似strtok()的东西似乎是基本的,足以解决这个问题。

应该使用正则表达式来解决这个问题吗? outlook一下,我看到了一个在C#上完成这个正则表达式解决方案的例子。

你可以用它做到这一点

 #include  #include  int main(void) { FILE *ics; char line[100]; ics = fopen("example.ics", "r"); if (ics == NULL) return -1; while (fgets(line, sizeof(line), ics) != NULL) { char *separator; char *key; char *tail; char *value; if ((tail = strchr(line, '\n')) != NULL) *tail = '\0'; // Remove the trailing '\n' separator = strpbrk(line, ":;"); if (separator == NULL) continue; *separator = '\0'; key = line; // Maybe you want to strip surrounding white spaces value = separator + 1; // Maybe you want to strip surrounding white spaces fprintf(stdout, "%s --> %s\n", key, value); } fclose(ics); return 0; } 

使用正则表达式就像用火箭筒杀死一只苍蝇。

 // disclaimer : no support // code provided as a example of minimal things one can do. #include  #include  #include  #include  struct value { struct value *next; char *val; }; struct property { struct property *next; char *prop; }; struct parameter { struct property *props; struct value *vals; struct parameter *next; char *name; }; enum PARSE_STATE { PARAMETER, PROPERTY, VALUE }; //format for lines is... // PARAMETER[;PARAM_PROPERTY..]:VALUE[,VALUE2..]\n struct parameter *parse( char *input ) { size_t start, end; char *buf; enum PARSE_STATE state; struct parameter *root = NULL; struct parameter *new_parameter; struct property *new_property; struct value *new_value; char in_quote = 0; start = end = 0; state = PARAMETER; while( input[end] ) { switch( state ) { case PARAMETER : if( input[end] == ';' || input[end] == ':' ) { new_parameter = malloc( sizeof( struct parameter ) ); new_parameter->next = root; new_parameter->name = malloc( end - start + 1 ); strncpy( new_parameter->name, input + start, end - start ); new_parameter->name[end-start] = 0; new_parameter->props = new_parameter->vals = NULL; root = new_parameter; start = end + 1; if( input[end] == ';' ) state = PROPERTY; else state = VALUE; } break; case PROPERTY : if( input[end] == '"' ) { if( !in_quote ) in_quote = input[start]; else if( input[start] == in_quote ) in_quote = 0; break; } if( in_quote ) break; if( input[end] == ';' || input[end] == ':' ) { new_property = malloc( sizeof( struct property ) ); new_property->prop = malloc( end - start + 1 ); strncpy( new_property->prop, input + start, end - start ); new_property->prop[end-start] = 0; new_property->next = root->props; root->props = new_property; if( input[end] == ':' ) state = VALUE; start = end + 1; break; } break; case VALUE : if( input[end] == '\n' || input[end] == ',' ) { new_value = malloc( sizeof( struct value ) ); new_value->val = malloc( end - start + 1 ); strncpy( new_value->val, input + start, end - start ); new_value->val[end-start] = 0; new_value->next = root->vals; root->vals = new_value; if( input[end] == '\n' ) state = PARAMETER; start = end + 1; } break; } end++; } if( end != start ) fprintf( stderr, "missing newline at end of input\n" ); return root; } void DumpResult( struct parameter *root ) { struct property *prop; struct value *val; for( ; root; root = root->next ) { printf( "%s ", root->name ); for( prop = root->props; prop; prop = prop->next ) printf( "; %s ", prop->prop ); for( val = root->vals; val; val = val->next ) { if( val == root->vals ) printf( " : %s ", val->val ); else printf( ", %s ", val->val ); } printf( "\n" ); } } 

并且……使用上面的代码。 这些价值都得到了反转……

 void main( void ) { char *string = "UID:uid1@example.com\n" "DTSTAMP:19970714T170000Z\n" "ORGANIZER;CN=John Doe;SENT-BY=\"mailto:smith@example.com\":mailto:john.doe@example.com\n" "CATEGORIES:Project Report, XYZ, Weekly Meeting\n" "DTSTART:19970714T170000Z\n" "DTEND:19970715T035959Z\n" "SUMMARY:Bastille Day Party\n"; struct parameter *thing = parse( string ); DumpResult( thing ); }