对于字符串,查找和替换

查找一些文本并将其替换为C字符串中的新文本可能比预期的要复杂一些。 我正在寻找一种快速且时间复杂度较小的算法。

我该怎么用?

我不禁想知道strstr()实现了什么算法。 鉴于这些是相当标准的算法,strstr()的良好实现完全有可能使用其中之一。

但是,无法保证strstr()实现优化算法,或者从一个平台到另一个平台使用相同的算法。

我在C中找不到我喜欢的搜索/替换实现,所以我在这里展示自己的。 它不使用像strstr(),snprintf(),任意长度的临时缓冲区等。它只需要干草堆缓冲区足够大,以便在替换后保存结果字符串。

// str_replace(haystack, haystacksize, oldneedle, newneedle) -- // Search haystack and replace all occurences of oldneedle with newneedle. // Resulting haystack contains no more than haystacksize characters (including the '\0'). // If haystacksize is too small to make the replacements, do not modify haystack at all. // // RETURN VALUES // str_replace() returns haystack on success and NULL on failure. // Failure means there was not enough room to replace all occurences of oldneedle. // Success is returned otherwise, even if no replacement is made. char *str_replace(char *haystack, size_t haystacksize, const char *oldneedle, const char *newneedle); // ------------------------------------------------------------------ // Implementation of function // ------------------------------------------------------------------ #define SUCCESS (char *)haystack #define FAILURE (void *)NULL static bool locate_forward(char **needle_ptr, char *read_ptr, const char *needle, const char *needle_last); static bool locate_backward(char **needle_ptr, char *read_ptr, const char *needle, const char *needle_last); char *str_replace(char *haystack, size_t haystacksize, const char *oldneedle, const char *newneedle) { size_t oldneedle_len = strlen(oldneedle); size_t newneedle_len = strlen(newneedle); char *oldneedle_ptr; // locates occurences of oldneedle char *read_ptr; // where to read in the haystack char *write_ptr; // where to write in the haystack const char *oldneedle_last = // the last character in oldneedle oldneedle + oldneedle_len - 1; // Case 0: oldneedle is empty if (oldneedle_len == 0) return SUCCESS; // nothing to do; define as success // Case 1: newneedle is not longer than oldneedle if (newneedle_len <= oldneedle_len) { // Pass 1: Perform copy/replace using read_ptr and write_ptr for (oldneedle_ptr = (char *)oldneedle, read_ptr = haystack, write_ptr = haystack; *read_ptr != '\0'; read_ptr++, write_ptr++) { *write_ptr = *read_ptr; bool found = locate_forward(&oldneedle_ptr, read_ptr, oldneedle, oldneedle_last); if (found) { // then perform update write_ptr -= oldneedle_len; memcpy(write_ptr+1, newneedle, newneedle_len); write_ptr += newneedle_len; } } *write_ptr = '\0'; return SUCCESS; } // Case 2: newneedle is longer than oldneedle else { size_t diff_len = // the amount of extra space needed newneedle_len - // to replace oldneedle with newneedle oldneedle_len; // in the expanded haystack // Pass 1: Perform forward scan, updating write_ptr along the way for (oldneedle_ptr = (char *)oldneedle, read_ptr = haystack, write_ptr = haystack; *read_ptr != '\0'; read_ptr++, write_ptr++) { bool found = locate_forward(&oldneedle_ptr, read_ptr, oldneedle, oldneedle_last); if (found) { // then advance write_ptr write_ptr += diff_len; } if (write_ptr >= haystack+haystacksize) return FAILURE; // no more room in haystack } // Pass 2: Walk backwards through haystack, performing copy/replace for (oldneedle_ptr = (char *)oldneedle_last; write_ptr >= haystack; write_ptr--, read_ptr--) { *write_ptr = *read_ptr; bool found = locate_backward(&oldneedle_ptr, read_ptr, oldneedle, oldneedle_last); if (found) { // then perform replacement write_ptr -= diff_len; memcpy(write_ptr, newneedle, newneedle_len); } } return SUCCESS; } } // locate_forward: compare needle_ptr and read_ptr to see if a match occured // needle_ptr is updated as appropriate for the next call // return true if match occured, false otherwise static inline bool locate_forward(char **needle_ptr, char *read_ptr, const char *needle, const char *needle_last) { if (**needle_ptr == *read_ptr) { (*needle_ptr)++; if (*needle_ptr > needle_last) { *needle_ptr = (char *)needle; return true; } } else *needle_ptr = (char *)needle; return false; } // locate_backward: compare needle_ptr and read_ptr to see if a match occured // needle_ptr is updated as appropriate for the next call // return true if match occured, false otherwise static inline bool locate_backward(char **needle_ptr, char *read_ptr, const char *needle, const char *needle_last) { if (**needle_ptr == *read_ptr) { (*needle_ptr)--; if (*needle_ptr < needle) { *needle_ptr = (char *)needle_last; return true; } } else *needle_ptr = (char *)needle_last; return false; } 

用法示例

 #define BUF 30 char *retval1, *retval2; char message[BUF] = "Your name is $USERNAME."; char username[] = "admin"; char username_toolong[] = "System Administrator"; int main() { retval1 = str_replace(message, BUF, "$USERNAME", username_toolong); retval2 = str_replace(message, BUF, "$USERNAME", username); if (!retval1) printf("Not enough room to replace $USERNAME with `%s'\n", username_toolong); if (!retval2) printf("Not enough room to replace $USERNAME with `%s'\n", username); printf("%s\n", message); return 0; } 

产量

没有足够的空间用“系统管理员”替换$ USERNAME
你的名字是管理员。

干杯。

Knuth-Morris-Pratt(经典)或Boyer-Moore(有时更快)?

尝试使用Google搜索“字符串搜索算法”。

使用std::string (来自 ),您只需使用findreplace

编辑:Touché。 这仅适用于C ++。

这对你有好处吗? http://www.daniweb.com/forums/thread51976.html

这是一个很好的代码

 #include  #include  char *replace_str(char *str, char *orig, char *rep) { static char buffer[4096]; char *p; if(!(p = strstr(str, orig))) // Is 'orig' even in 'str'? return str; strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$ buffer[p-str] = '\0'; sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig)); return buffer; } int main(void) { puts(replace_str("Hello, world!", "world", "Miami")); return 0; } 

我的解决方案,基于其他人,但我相信更安全一点:

 #include  #include  #include  #define MAX_SOURCE_SIZE (0x100000) char * searchReplace(char * string, char *toReplace[], char *replacements[], int numReplacements){ int i = 0; char *locOfToRep; char *toRep; char *rep; int lenToRep,lenStr,lenAfterLocRep; static char buffer[MAX_SOURCE_SIZE]; for(i = 0; i < numReplacements; ++i){ toRep = toReplace[i]; rep = replacements[i]; //if str not in the string, exit. if (!(locOfToRep = strstr(string,toRep))){ exit(EXIT_FAILURE); } lenToRep = strlen(toRep); lenStr = strlen(string); lenAfterLocRep = strlen(locOfToRep); //Print the string upto the pointer, then the val, and then the rest of the string. sprintf(buffer, "%.*s%s%s", lenStr-lenAfterLocRep, string,rep,locOfToRep+lenToRep); string = buffer; } return buffer; } int main(){ char * string = "Hello, world!"; int numVals; char *names[2] = {"Hello", "world"}; char *vals[2] = {"Goodbye", "you"}; numVals = 2; string = searchReplace(string, names, vals, numVals); printf("%s\n",string); }