在Trie Structure字典中添加单词

我正在尝试创建一个可以插入单词的trie结构，但结构必须完全像这样：

typedef struct tNode_t { struct tNode_t **l; char *w; } tNode;

**l是指向tNode的27个指针数组的指针，这是我不理解的部分。

如果数组是指向tNode的指针，我该如何在其中插入单词？并且由于数组的大小是27（26个小写字母az和终止字符），你怎么知道输入单词的位置取决于起始字母是什么？

我们假设这个词是’cab’。

粗略地说，成员w指向一个角色。列表l中的26个节点用于字母“a”到“z”。

给出’cab’这个词，第一个字母是’c’，所以你会在根节点指针中查找root->l[3] ，其中包含指向以’c’开头的所有单词的指针; 称之为tNode *letter = root->l[3]; 。那么你会找到以’ca’开头的单词的字母 – letter->l[1] ; letter = letter->l[1]; 。然后你会找到以’cab’开头的单词 – letter->l[2] 。此时，您知道您已经到达了搜索单词的末尾，而letter->w != 0告诉您该单词有效并为您提供单词的文本。在树下可能还有其他词（对于’出租车’，’电缆’，’阴谋’等）。

你会被教导一些关于这一点，或给出一些规范。可能还有其他方法可以填写详细信息。

我不清楚使用动态分配的数组比使用tNode *l[27];更好tNode *l[27]; 在结构中（适当调整声明），但这是一个单独的讨论。我轻率地假设列表中的第27个节点没有更深层次的意义。当然，您可以在网上查找这些内容：维基百科往往是与计算机科学相关的无争议问题的合理来源，但我还没有研究链接页面。

l不是数组; 它是一个指针数组的指针，这让我很困惑。

指针变量具有双重生命–Jekyll博士和海德先生 – 可以被视为指针或（随便）数组。

如果你写char *str ， str不是一个字符数组; 它是指向一个字符数组（的第一个字符）的指针。您可以使用str[i]访问字符串中的^第 i ^个字符。同样， char **argv不是字符串数组; 它是一个指向（第一个字符串）字符串数组的指针。您可以使用argv[i]访问主程序的^第 i ^个参数。

在这个trie结构中， l不是tNode *的数组; 它是一个指向trie结构数组（的第一个元素）的指针。但是你可以使用trie->l[i]来访问trie->l指向的列表中的^第 i ^个结构。

工作代码

我之前没有玩过尝试，所以我根据你的数据结构把下面的代码放在一起。无论是否正式正确，您都需要调查; 对于给定的测试用例，下面的代码适用于我，而valgrind为代码提供了一个干净的健康状况（没有泄漏的内存，没有内存滥用）。

该代码强制包含通常在头文件中的内容。就外部世界而言，trie类型（ tNode ）是不透明的。

 #ifndef TRIE_H_INCLUDED #define TRIE_H_INCLUDED typedef struct tNode_t tNode; extern void trie_add_word(tNode *trie, char const *word); extern char const *trie_find_word(tNode *trie, char const *word); extern void trie_print(tNode *trie); extern tNode *trie_new(void); extern void trie_free(tNode *trie); #endif /* TRIE_H_INCLUDED */ /*#include "trie.h"*/ #include  #include  #include  #include  #include  #include  struct tNode_t { struct tNode_t **l; char *w; }; static void db_print(char const *fmt, ...); tNode *trie_new(void) { tNode *trie = malloc(sizeof(tNode)); assert(trie != 0); // Abysmal way to validate memory allocation trie->w = 0; trie->l = (tNode **)calloc(27, sizeof(tNode *)); assert(trie->l != 0); // Abysmal way to validate memory allocation return(trie); } void trie_free(tNode *trie) { assert(trie != 0); assert(trie->l != 0); for (size_t i = 0; i < 27; i++) { if (trie->l[i] != 0) trie_free(trie->l[i]); } free(trie->l); free(trie->w); free(trie); } static void add_word_suffix(tNode *trie, char const *word, char const *suffix) { int c; assert(trie != 0); assert(trie->l != 0); db_print("-->> %s: word [%s], suffix [%s]\n", __func__, word, suffix); while ((c = *suffix++) != '\0') { if (isalpha(c)) { db_print("---- %s: letter %c (index %d)\n", __func__, c, c - 'a' + 1); c = tolower(c) - 'a' + 1; assert(trie->l != 0); if (trie->l[c] == 0) trie->l[c] = trie_new(); db_print("---- %s: recurse: [%s]/[%s]\n", __func__, word, suffix); add_word_suffix(trie->l[c], word, suffix); db_print("<<-- %s\n", __func__); return; } } if (trie->w != 0) { db_print("---- %s: trie already contains word [%s] at [%s]\n", __func__, word, trie->w); return; } trie->w = strdup(word); db_print("<<-- %s: inserted word [%s]\n", __func__, trie->w); } void trie_add_word(tNode *trie, char const *word) { add_word_suffix(trie, word, word); } static char const *find_word_suffix(tNode *trie, char const *word, char const *suffix) { int c; db_print("-->> %s: word [%s] suffix[%s]\n", __func__, word, suffix); for ( ; (c = *suffix) != '\0'; suffix++) { if (isalpha(c)) { db_print("---- %s: letter %c\n", __func__, c); c = tolower(c) - 'a' + 1; if (trie->l[c] == 0) return(0); char const *rv = find_word_suffix(trie->l[c], word, suffix+1); if (rv == 0) { db_print("<<-- %s: missing [%s]/[%s]\n", __func__, word, suffix); return 0; } db_print("<<-- %s: found [%s] for [%s]/[%s]\n", __func__, rv, word, suffix); return rv; } } if (trie->w == 0) { db_print("<<-- %s: missing [%s]/[%s]\n", __func__, word, suffix); return 0; } db_print("<<-- %s: found [%s] for [%s]/[%s]\n", __func__, trie->w, word, suffix); return(trie->w); } char const *trie_find_word(tNode *trie, char const *word) { return find_word_suffix(trie, word, word); } void trie_print(tNode *trie) { assert(trie != 0); assert(trie->l != 0); if (trie->w != 0) printf("%s\n", trie->w); for (size_t i = 0; i < 27; i++) { if (trie->l[i] != 0) trie_print(trie->l[i]); } } static int debug = 0; static void db_print(char const *fmt, ...) { if (debug > 0) { va_list args; va_start(args, fmt); vprintf(fmt, args); va_end(args); } } int main(int argc, char **argv) { tNode *trie = trie_new(); /* Set debugging - and use argv */ if (argc > 1 && argv[argc] == 0) debug = 1; /* First test */ char const *word = "cab"; trie_add_word(trie, word); char const *leaf = trie_find_word(trie, word); printf("Leaf word = %s\n", leaf); trie_free(trie); /* Second, more comprehensive test */ static char const *words[] = { "cabal", "cabbie", "cab", "centre", "cinema", "cold", "culminate", "culmination", "duck", "cabs", "amniocentesis", "amniocentesis", "amniocentesis", "cam", "cab", "cab", "zulu", "alpha", "bravo", "Charlie", "delta", "echo", "foxtrot", "golf", "hotel", "India", "Juliet", "kilo", "Lima", "Mike", "November", "Oscar", "Papa", "Quebec", "Romeo", "Sierra", "Tango", "uMBRelLA", "Victor", "Whisky", "X-ray", "Yankee", "Zulu", "Aquarius", }; size_t num_words = sizeof(words) / sizeof(words[0]); size_t counter = 0; /* First time, add every other word; second time, every word */ for (size_t mod = 2; mod > 0; mod--) { trie = trie_new(); printf("\nTest %zu\n", ++counter); for (size_t i = 0; i < num_words; i++) { if (i % mod == 0) trie_add_word(trie, words[i]); char const *leaf = trie_find_word(trie, words[i]); if (leaf == 0) printf("Word [%s] is missing\n", words[i]); else printf("Word [%s] for [%s]\n", leaf, words[i]); } printf("\nTrie:\n"); trie_print(trie); trie_free(trie); } return(0); }

代码使用assert()进行内存分配检查。它很方便但很糟糕。不要模仿那个。如果您愿意，您可以决定将trie_find_word()和trie_add_word()function组合到一个函数中。通过将参数（任何参数）传递给测试程序，您可以看到相当详细的诊断。

代码使用27（不是在宏或枚举中），但是如果你删除在几个地方出现的+ 1以将'a'转换为1 （它会转换它），它可以在数组中只有26个元素正常工作改为0 ）。

在Trie Structure字典中添加单词

工作代码

C – 将指针数据保存/加载到文件

在C中获取用户输入的最简单方法是什么？

什么和\ b做什么？

在64位拱上投射指向整数问题警告的指针

使用fgets从文件中读取

在C中没有fclose的fopen

可能的堆栈损坏

浮点再次

‘C’分段故障与2darrays

C文件操作问题