如何使用fgetc（）计算唯一的单词数，然后在C中打印计数

我已经问了一个与这个程序有关的问题，但现在经过大量的研究和头部停留后，我再次陷入困境。

我正在尝试编写一个程序，它将接受用户输入并存储它然后打印出所有唯一的单词和它们每次发生的次数

例如

Please enter something: Hello#@Hello# hs,,,he,,whywhyto[then the user hits enter] hello 2 hs 1 he 1 whywhyto 1

上面应该是输出，当然为什么为什么不是一个单词但在这种情况下无关紧要因为我假设任何字母的模式由不是字母的任何东西分隔（空格，0-9，＃$ （@等）被认为是一个单词。我需要使用2D数组，因为我不能使用链表，也不能理解它们。

这就是我到目前为止所做的一切

 #include  #include  int main() { char array[64]; int i=0, j, input; printf("Please enter an input:"); input=fgetc(stdin); while(input != '\n') { if(isalpha(input)) { array[i]=input; i++; } input=fgetc(stdin); } for(j=0;j<i;j++) { // printf("%c ",j,array[j]); printf("%c",array[j]); } printf("\n"); }

我正在使用isalpha来获取字母，但所有这一切都是它摆脱了任何不是字母的东西，存储它然后打印回来，但我还没有一个线索如何让它为他们的第一次存储一次发生，然后只增加每个单词的计数。我只能使用fgetc（），这对我来说至少很难，我只有3-4个月的C经验，我知道我将不得不使用二维数组，已经阅读了它们但我还没有能够要理解我将如何实现它们，请帮助我一点。

这是似乎工作的代码：

 #include  #include  #include  #include  enum { MAX_WORDS = 64, MAX_WORD_LEN = 20 }; int main(void) { char words[MAX_WORDS][MAX_WORD_LEN]; int count[MAX_WORDS] = { 0 }; int w = 0; char word[MAX_WORD_LEN]; int c; int l = 0; while ((c = getchar()) != EOF) { if (isalpha(c)) { if (l < MAX_WORD_LEN - 1) word[l++] = c; else { fprintf(stderr, "Word too long: %*s%c...\n", l, word, c); break; } } else if (l > 0) { word[l] = '\0'; printf("Found word <<%s>>\n", word); assert(strlen(word) < MAX_WORD_LEN); int found = 0; for (int i = 0; i < w; i++) { if (strcmp(word, words[i]) == 0) { count[i]++; found = 1; break; } } if (!found) { if (w >= MAX_WORDS) { fprintf(stderr, "Too many distinct words (%s)\n", word); break; } strcpy(words[w], word); count[w++] = 1; } l = 0; } } for (int i = 0; i < w; i++) printf("%3d: %s\n", count[i], words[i]); return 0; }

样本输出：

 $ ./wordfreq <<< "I think, therefore I am, I think, or maybe I do not think after all, and therefore I am not." Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> 5: I 3: think 2: therefore 2: am 1: or 1: maybe 1: do 2: not 1: after 1: all 1: and $ ./wordfreq <<< "I think thereforeIamIthinkormaybeI do not think after all, and therefore I am not." Found word <> Found word <> Word too long: thereforeIamIthinkor... 1: I 1: think $ ./wordfreq <<< "abcdefghijklmnopqrstu vwxyz > ABCDEFGHIJKLMNOPQRSTU VWXYZ > aa ab ac ad ae af ag ah ai aj ak al am > an ao ap aq ar as at au av aw ax ay az > " Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <
> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Found word <> Too many distinct words (am) 1: a 1: b 1: c 1: d 1: e 1: f 1: g 1: h 1: i 1: j 1: k 1: l 1: m 1: n 1: o 1: p 1: q 1: r 1: s 1: t 1: u 1: v 1: w 1: x 1: y 1: z 1: A 1: B 1: C 1: D 1: E 1: F 1: G 1: H 1: I 1: J 1: K 1: L 1: M 1: N 1: O 1: P 1: Q 1: R 1: S 1: T 1: U 1: V 1: W 1: X 1: Y 1: Z 1: aa 1: ab 1: ac 1: ad 1: ae 1: af 1: ag 1: ah 1: ai 1: aj 1: ak 1: al $

对“单词太长”和“太多单词”的测试有助于让我放心，代码是合理的。设计这样的测试是很好的做法。

不知道这是否是功课，所以我没有为你做任何事，我也清理了你的代码。但是，如果你不知道这个人可以输入多少单词，你需要动态数据结构，例如链表

 #include  #include  #include  typedef struct linkedlist linkedlist; struct linkedlist{ char *word; int count; linkedlist *next; }; int main() { //know your bounds, this will cause trouble if word is longer than 64 chars char array[64]; int i=0, input; linkedlist *head = NULL; printf("Please enter an input:"); while((input=fgetc(stdin)) != '\n') { if(isalpha(input) && i!=63) //added this so that code does not brake (word is 64 chars) { array[i]=input; } else{ array[i]='\0'; char *word = malloc(strlen(array)+1); strcpy(word, array); add_word(word, &head); i=0; //need to restart i to keep reading words } i++; } //print out final results for(linkedlist *temp = head; temp != NULL; temp = temp->next){ printf("%s %d ", temp->word, temp->count); } } //adds word to end of list if does not exist //increments word count if it exists void add_word(char *word, linkedlist **ll){ //implement this } //frees resources used by malloc (lookup how to free a linkedlist/destroy a linked list //make sure to free both final and head in main void destroy_list(linkedlist **ll){ //implement this }

对于add_word，你需要的东西是（PSEUDO-CODE）：

 list = *ll if(list == NULL): //new list *ll = malloc(sizeof(linkedlist)) ll->word = word ll->count = 1 ll->next = NULL return while list->next != null: if word = list->word: free(word) list->count++ return list = list->next if list->word = word: //last word in list free(word) list->count++ else: //word did not exist, add new word to end of list temp = malloc(sizeof(linkedlist)) temp->word = word temp->count = 1 list->next = temp

也许不是最有效的方式，但你可以改进它希望我没有进一步混淆你，祝你好运

OP仍然有很多工作要做。

这个技巧是1）读取输入2）识别分隔符3）将单词与整个缓冲区进行比较，4）仅打印一次。

这种方法具有内存效率，因为它只使用OP建议的64 char缓冲区。搜索复杂度为O（n * n）

 #include  #include  #include  // Helper function to find word occurrences. void Print_count(const char *word, const char *array, int i) { int count = 0; const char *found; for (int j = 0; j < i; j++) { if (isalpha((unsigned char ) array[j])) { if (strcmp(&array[j], word) == 0) { found = &array[j]; count++; } // skip rest of word do { j++; } while (isalpha((unsigned char ) array[j])); } } if (found == word) { printf("%s %d\n", word, count); } } int main(void) { char array[64]; int i = 0; int j; int input; printf("Please enter an input:"); // get the input while ((input = fgetc(stdin)) != '\n' && input != EOF) { array[i] = input; if (i + 1 >= sizeof array) break; i++; } array[i] = '\0'; // change all delimiters to \0 for (j = 0; j < i; j++) { if (!isalpha((unsigned char ) array[j])) { array[j] = '\0'; } } for (j = 0; j < i; j++) { // Use the beginning of each word ... if (isalpha((unsigned char ) array[j])) { Print_count(&array[j], array, i); // skip test of word do { j++; } while (isalpha((unsigned char ) array[j])); } } return 0; }

输入Hello#@Hello# hs,,,he,,whywhyto

输出：

 Hello 2 hs 1 he 1 whywhyto 1

如何使用fgetc（）计算唯一的单词数，然后在C中打印计数

双指针在函数中没有得到malloc’d

为什么打印0（零）而没有使用C打印格式“％＃x”的前导“0x”？

fork（）如何工作？

同步写入使用FILE_FLAG_OVERLAPPED打开的文件

为什么在MISRA：2012中需要function原型？

模式组成的数字顺时针方向围绕一个矩形形状（每次减少长度和宽度）

有没有办法显示LLVM自动矢量化的位置？

无法在C中声明大小为400000的数组

xcode不编译c源代码

从剪贴板获取CF_DIBV5的位图