为什么我的trie泄露数据?

我正在尝试使用trie制作拼写检查器,但是我试图添加到我的trie中的单词似乎没有被放入。泄漏在哪里?

我花了几个小时使用调试器并单步执行代码……

主function:

/** * Implements a spell-checker. */ #include  #include  #include  #include  #include "dictionary.h" #undef calculate #undef getrusage // default dictionary #define DICTIONARY "dictionaries/large" // prototype double calculate(const struct rusage *b, const struct rusage *a); int main(int argc, char *argv[]) { // check for correct number of args if (argc != 2 && argc != 3) { printf("Usage: speller [dictionary] text\n"); return 1; } // structs for timing data struct rusage before, after; // benchmarks double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0; // determine dictionary to use char* dictionary = (argc == 3) ? argv[1] : DICTIONARY; // load dictionary getrusage(RUSAGE_SELF, &before); bool loaded = load(dictionary); getrusage(RUSAGE_SELF, &after); // abort if dictionary not loaded if (!loaded) { printf("Could not load %s.\n", dictionary); return 1; } // calculate time to load dictionary time_load = calculate(&before, &after); // try to open text char *text = (argc == 3) ? argv[2] : argv[1]; FILE *fp = fopen(text, "r"); if (fp == NULL) { printf("Could not open %s.\n", text); unload(); return 1; } // prepare to report misspellings printf("\nMISSPELLED WORDS\n\n"); // prepare to spell-check int index = 0, misspellings = 0, words = 0; char word[LENGTH+1]; // spell-check each word in text for (int c = fgetc(fp); c != EOF; c = fgetc(fp)) { // allow only alphabetical characters and apostrophes if (isalpha(c) || (c == '\'' && index > 0)) { // append character to word word[index] = c; index++; // ignore alphabetical strings too long to be words if (index > LENGTH) { // consume remainder of alphabetical string while ((c = fgetc(fp)) != EOF && isalpha(c)); // prepare for new word index = 0; } } // ignore words with numbers (like MS Word can) else if (isdigit(c)) { // consume remainder of alphanumeric string while ((c = fgetc(fp)) != EOF && isalnum(c)); // prepare for new word index = 0; } // we must have found a whole word else if (index > 0) { // terminate current word word[index] = '\0'; // update counter words++; // check word's spelling getrusage(RUSAGE_SELF, &before); bool misspelled = !check(word); getrusage(RUSAGE_SELF, &after); // update benchmark time_check += calculate(&before, &after); // print word if misspelled if (misspelled) { printf("%s\n", word); misspellings++; } // prepare for next word index = 0; } } // check whether there was an error if (ferror(fp)) { fclose(fp); printf("Error reading %s.\n", text); unload(); return 1; } // close text fclose(fp); // determine dictionary'size getrusage(RUSAGE_SELF, &before); unsigned int n = size(); getrusage(RUSAGE_SELF, &after); // calculate time to determine dictionary's size time_size = calculate(&before, &after); // unload dictionary getrusage(RUSAGE_SELF, &before); bool unloaded = unload(); getrusage(RUSAGE_SELF, &after); // abort if dictionary not unloaded if (!unloaded) { printf("Could not unload %s.\n", dictionary); return 1; } // calculate time to unload dictionary time_unload = calculate(&before, &after); // report benchmarks printf("\nWORDS MISSPELLED: %d\n", misspellings); printf("WORDS IN DICTIONARY: %d\n", n); printf("WORDS IN TEXT: %d\n", words); printf("TIME IN load: %.2f\n", time_load); printf("TIME IN check: %.2f\n", time_check); printf("TIME IN size: %.2f\n", time_size); printf("TIME IN unload: %.2f\n", time_unload); printf("TIME IN TOTAL: %.2f\n\n", time_load + time_check + time_size + time_unload); // that's all folks return 0; } /** * Returns number of seconds between b and a. */ double calculate(const struct rusage *b, const struct rusage *a) { if (b == NULL || a == NULL) { return 0.0; } else { return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) - (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) + ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) - (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec))) / 1000000.0); } } 

检查单词是否在trie中的函数:

 /** * Implements a dictionary's functionality. */ #include  #include  #include  #include  #include  #include "dictionary.h" int dictionary_size; bool find(char *word, node *next) { word++; if (next == NULL) { return false; } else if (word[0] == '\0') { if (next->is_word == true) { return true; } else { return false; } } else { //go to this nodes chold that is now eual to the first letter of the word if (word[0] == '\'') { return find(word, next->children[26]); } else { return find(word, next->children[word[0] - 'a']); } } } /** * Returns true if word is in dictionary else false. */ bool check(const char *word) { //get word from text and make it malluable char w[47]; memcpy(w, word, strlen(word) + 1); //make sure it's all lower case for (int i = 0; i < strlen(word); i++) { w[i] = tolower(w[i]); } //go to the root child node equal to the first letter of the word if (find(w, root)) { return true; } else { return false; } } 

将单词字典加载到trie中的函数。 (这是我猜的泄漏?):

 struct node *newNode(char *word, node *next) { next = malloc(sizeof(node)); word++; if (word[0] == '\0') { next->is_word = true; return next; } else { node *new_node = NULL; // go to this nodes choild that is now equal to the first letter of the word if (word[0] == '\'') { return next->children[26] = newNode(word, new_node); } else { return next->children[word[0] - 97] = newNode(word, new_node); } } } /** * Loads dictionary into memory. Returns true if successful else false. */ bool load(const char *dictionary) { dictionary_size = 0; FILE *dic = fopen(dictionary, "r"); if (dic == NULL) { return false; } // Initalize root node root = malloc(sizeof(node)); //get word from dictinary int ch; while ((ch = fgetc(dic)) != EOF) { char word[47]; fgets(word, 47, dic); dictionary_size++; // make sure it's all lower case for (int i = 0; i children[word[0] - 'a'] = newNode(word, child_node); } return true; } /** * Returns number of words in dictionary if loaded else 0 if not yet loaded. */ unsigned int size(void) { if (dictionary_size != 0) { return dictionary_size; } return 0; } void free_node(node *next) { // safety including root node if(!next) return; // takes you to end of trie for (int i = 0; i children[i]); } // base case free(next); } /** * Unloads dictionary from memory. Returns true if successful else false. */ bool unload(void) { free_node(root); return true; } 

定义节点结构:

 typedef struct node { bool is_word; struct node *children[27]; } node; node *root; 

此代码使用彻底重写的addWord()函数代替addNode()

 /* SO 3994-9116 */ #include  #include  #include  #include  #include  #include  typedef struct node { bool is_word; struct node *children[27]; } node; static node *root = 0; static int dictionary_size = 0; static void addWord(char *word, node *trie) { assert(islower((unsigned char)word[0]) || word[0] == '\0'); if (word[0] == '\0') trie->is_word = true; else { int code = word[0] - 'a'; if (trie->children[code] == 0) { trie->children[code] = calloc(sizeof(node), 1); if (trie->children[code] == 0) { fprintf(stderr, "Memory allocation failed\n"); exit(1); } } addWord(&word[1], trie->children[code]); } } static bool load(const char *dictionary) { dictionary_size = 0; FILE *dic = fopen(dictionary, "r"); if (dic == NULL) return false; root = calloc(1, sizeof(node)); if (root == 0) { fprintf(stderr, "Memory allocation failed\n"); exit(1); } char word[47]; while (fgets(word, sizeof(word), dic) != 0) { char *pos; if ((pos = strchr(word, '\n')) != NULL) *pos = '\0'; dictionary_size++; for (int i = 0, len = strlen(word); i < len; i++) { if (!isalpha((unsigned char)word[i])) fprintf(stderr, "Non-alphabetic character in '%s'\n", word); else word[i] = tolower((unsigned char)word[i]); } addWord(word, root); } return true; } static void print_subtrie(const char *prefix, char nchar, node *child) { if (child != 0) { int len = strlen(prefix); char buffer[len + 2]; strcpy(buffer, prefix); buffer[len] = nchar; buffer[len+1] = '\0'; if (child->is_word) printf("%s\n", buffer); for (int i = 0; i < 26; i++) print_subtrie(buffer, 'a' + i, child->children[i]); } } static void print_trie(node *top) { for (int i = 0; i < 26; i++) print_subtrie("", 'a' + i, top->children[i]); } static void free_trie(node *trie) { if (trie != 0) { for (int i = 0; i < 27; i++) { if (trie->children[i] != 0) free_trie(trie->children[i]); } free(trie); } } int main(void) { if (load("words.list")) { printf("Nominal dictionary size: %d\n", dictionary_size); print_trie(root); free_trie(root); } return 0; } 

测试文件1:

 aardvark abelone assignation assignment assigned assert assertion 

输出文件1:

 Nominal dictionary size: 7 aardvark abelone assert assertion assignation assigned assignment 

测试文件2:

 Mike Aquarius delta assert zulu assertion alpha aardvark abelone culmination Zulu kilo amniocentesis Oscar Yankee cabal echo Sierra duck Charlie Quebec centre assigned Papa foxtrot uMBRelLA Romeo cab India Juliet bravo amniocentesis hotel amniocentesis Lima Whisky cab culminate cold Xray cab cabs assignation cam Victor golf Tango November cinema cabbie assignment 

输出文件2:

 Nominal dictionary size: 56 a aardvark abelone alpha amniocentesis aquarius as assert assertion assign assignation assigned assignment bravo cab cabal cabbie cabs cam centre charlie cinema cold culminate culmination delta duck echo foxtrot golf hotel i india juliet kilo lima mike november o oscar papa quebec romeo sierra tango umbrella victor whisky xray yankee zulu 

请注意,56的“标称尺寸”太大; 输入中有46个不同的单词,因此输出中有。