在C中拆分未加引号的字符串

我正在编写一个函数将字符串拆分为指向指针的指针,如果separator是空格,我想只拆分不在引号内的单词。 例如Hello world "not split"应该返回

 Hello world "not split" 

不知何故,该函数将引号内的单词拆分,并且不会在引号之外拆分单词。

 #include  #include  #include  int is_quotes(char *s) { int i; int count; i = 0; count = 0; while (s[i]) { if (s[i] == '"') count++; i++; } if (count == 0) count = 1; return (count % 2); } int count_words(char *s, char sep) { int check; int i; int count; check = 0; if (sep == ' ') check = 1; i = 0; count = 0; while (*s && *s == sep) ++s; if (*s) count = 1; while (s[i]) { if (s[i] == sep) { if (!is_quotes(s + i) && check) { i += 2; while (s[i] != 34 && s[i]) i++; } count++; } i++; } return (count); } char *ft_strsub(char const *s, unsigned int start, size_t len) { char *sub; sub = malloc(len + 1); if (sub) memcpy(sub, s + start, len); return (sub); } char **ft_strsplit(char const *s, char c) { int words; char *start; char **result; int i; words = count_words((char *)s, c); if (!s || !c || words == 0) return (NULL); i = 0; result = (char **)malloc(sizeof(char *) * (words + 1)); start = (char *)s; while (s[i]) { if (s[i] == c) { if (is_quotes((char *)s + i) == 0 && c == ' ') { i += 2; while (s[i] != '"' && s[i]) i++; i -= 1; } if (start != (s + i)) *(result++) = ft_strsub(start, 0, (s + i) - start); start = (char *)(s + i) + 1; } ++i; } if (start != (s + i)) *(result++) = ft_strsub(start, 0, (s + i) - start); *result = NULL; return (result - words); } int main(int argc, char **argv) { if (argc > 1) { char **s; s = ft_strsplit(argv[1], ' '); int i = 0; while (s[i]) printf("%s\n", s[i++]); } return 0; } 

当我用hello world "hello hello"运行这段代码时,我得到以下内容

 hello world "hello hello" 

试试这个(修复并减少)

 #include  #include  #include  typedef struct token { const char *top; const char *end;//point to next character } Token; Token getToken(const char **sp, char sep){ const char *s = *sp; const char *top, *end; Token token = { NULL, NULL}; while(*s && *s == sep)//skip top separators ++s; if(!*s){ *sp = s; return token;//return null token } token.top = s; while(*s && *s != sep){ if(*s == '"'){ char *p = strchr(s + 1, '"');//search end '"' if(p) s = p;//skip to '"' } ++s; } token.end = s; *sp = s; return token; } int count_words(const char *s, char sep){ int count = 0; Token token = getToken(&s, sep); while(token.top != NULL){ ++count; token = getToken(&s, sep); } return count; } char *ft_strsub(Token token){ size_t len = token.end - token.top; char *sub = malloc(len + 1); if (sub){ memcpy(sub, token.top, len); sub[len] = 0; } return sub; } char **ft_strsplit(const char *s, char sep){ int words; if (!s || !sep || !(words = count_words(s, sep))) return NULL; char **result = malloc(sizeof(char *) * (words + 1)); if(!result){ perror("malloc"); return NULL; } int i = 0; Token token = getToken(&s, sep); while(token.top != NULL){ result[i++] = ft_strsub(token); token = getToken(&s, sep); } result[i] = NULL; return result; } int main(int argc, char **argv){ const char *text = "Hello world \"not split\""; char **s = ft_strsplit(text, ' '); int i = 0; while (s[i]){ printf("%s\n", s[i]); free(s[i++]); } free(s); return 0; } 

逃避角色处理版本。

 #include  #include  #include  #define ESCAPE '\\' //ESCAPE CHARACTER typedef struct token { const char *top; const char *end;//point to next character } Token; Token getToken(const char **sp, char sep){ const char *s = *sp; const char *top, *end; Token token = { NULL, NULL}; while(*s && *s == sep)//skip top separators ++s; if(!*s){ *sp = s; return token; } token.top = s; while(*s && *s != sep){ if(*s == ESCAPE) ++s; else if(*s == '"'){ char *p = strchr(s + 1, '"');//search end '"' while(p && p[-1] == ESCAPE) p = strchr(p + 1, '"'); if(p) s = p; } ++s; } token.end = s; *sp = s; return token; } int count_words(const char *s, char sep){ int count = 0; Token token = getToken(&s, sep); while(token.top != NULL){ ++count; token = getToken(&s, sep); } return count; } char *remove_escape(char *s){ char *from, *to; from = to = s; while(*from){ if(*from != ESCAPE) *to++ = *from; ++from; } *to = 0; return s; } char *ft_strsub(Token token){ size_t len = token.end - token.top; char *sub = malloc(len + 1); if (sub){ memcpy(sub, token.top, len); sub[len] = 0; } return sub; } char **ft_strsplit(const char *s, char sep){ int words; if (!s || !sep || !(words = count_words(s, sep))) return NULL; char **result = malloc(sizeof(char *) * (words + 1)); if(!result){ perror("malloc"); return NULL; } Token token = getToken(&s, sep); int i = 0; while(token.top != NULL){ result[i] = ft_strsub(token); remove_escape(result[i++]); token = getToken(&s, sep); } result[i] = NULL; return result; } void test(const char *text){ printf("original:%s\n", text); printf("result of split:\n"); char **s = ft_strsplit(text, ' '); int i = 0; while (s[i]){ printf("%s\n", s[i]); free(s[i++]); } free(s); puts(""); } int main(int argc, char **argv){ test("Hello world \"not split\""); test("Hello world \"not \\\" split\"");//include " in "..." test("Hello world not\\ split");//escape separator return 0; } 

结果:

 original:Hello world "not split" result of split: Hello world "not split" original:Hello world "not \" split" result of split: Hello world "not " split" original:Hello world not\ split result of split: Hello world not split 

您需要一个具有两种状态的状态机,报价和报价。 当你点击引号时,翻转状态。 当你点击一个空格时,如果有报价则转换为换行符,而不是报价。 (你很快就会想让它更精细地允许字符串转义等,状态机方法可以扩展到那个)。