Exercise 6.4 - Words and Frequency

Question

Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.

/*
 * Write a program that prints the distinct words in its input sorted into
 * decreasing order of frequency of occurrence. Precede each word by its count.
 */

/*
 * Create a Tree with word and count, just like tnode.
 * Parse the tree and create a new tree with count and list of words in the node.
 * Print the new tree in-order traversal.
 */

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <limits.h>

#define MAXWORD 1000

struct tnode
{
    char *word;
    int count;
    struct tnode *left;
    struct tnode *right;
};

struct bynumbernode
{
    int number;
    struct words *wordlist;
    struct bynumbernode *left;
    struct bynumbernode *right;
};

struct words
{
    char *word;
    struct words *nextword;
};

struct tnode *addtree(struct tnode *p, char *w);
struct bynumbernode *addnumtree(struct bynumbernode *, int, char*);
struct words *addwordtolist(struct words*, char *);
void printwords(const struct words*, const int);

struct bynumbernode *traverse(const struct tnode *, struct bynumbernode *);
void treeprint(const struct bynumbernode *);
int mgetword(char *, int);

struct tnode *talloc(void){
    return (struct tnode *)malloc(sizeof(struct tnode));
};

struct bynumbernode *bynumbernodealloc(void){
    return (struct bynumbernode *)malloc(sizeof(struct bynumbernode));
};

struct words *wordsalloc(void){
    return (struct words *)malloc(sizeof(struct words));
};



#define BUFSIZE 100

char buf[BUFSIZE];
int bufp=0;

int getch(void){
    return (bufp > 0) ? buf[--bufp] : getchar();
}
void ungetch(int c){
    if(bufp >= BUFSIZE){
        printf("ungetch: too many characters\n");
    }
    else buf[bufp++] = c;
    return;
}

char *mstrdup(char *s){
    char *p;
    p = (char *)malloc(strlen(s) + 1);
    if(p != NULL){
        strcpy(p, s);
    }
    return p;
}



int getword(char *word, int lim){
    int c, getch(void);
    void ungetch(int);
    char *w = word;

    while(isspace(c = getch()) || c == '_' || c == '/' || c == '#' || c == '*' || c == '"');

    if(c != EOF){
        *w++ = c;
    }
    if(!isalpha(c)){
        *w = '\0';
        return c;
    }
    for( ; --lim > 0; w++){
        if(!isalnum(*w = getch())){
            ungetch(*w);
            break;
        }
    }
    *w = '\0';
    return word[0];
}
/* addtree : add a node with w at or below p */
struct tnode *addtree(struct tnode *p, char *w)
{
    int cond;

    if (p == NULL){ /* new word */
        p = talloc(); // make new node
        p->word = mstrdup(w);
        p->count = 1;
        p->left = p->right = NULL;
    }
    else if ((cond = strcmp(w, p->word)) == 0){
        p->count++;
    }
    else if(cond < 0){
        p->left = addtree(p->left, w);
    }
    else{
        p->right = addtree(p->right, w);
    }
    return p;
}
// treeprint: in-order print of tree p
void treeprint(const struct bynumbernode *p){
    if(p != NULL){
        treeprint(p->left);
        printwords(p->wordlist, p->number);
        treeprint(p->right);
    }
}

void printwords(const struct words* w, const int count)
{
    if (w != NULL){
        printf("%d->%s", count, w->word);
        w = w->nextword;
    }
    while (w != NULL) {
        printf(", %s",  w->word);
        w = w->nextword;
    }
    printf("\n");
}


struct words *addwordtolist(struct words* list, char *w){
    if(list == NULL){
        list = wordsalloc();
        list->word = mstrdup(w);
        list->nextword = NULL;
    }
    else {
        list->nextword = addwordtolist(list->nextword, w);
    }
    return list;
}


struct bynumbernode *addnumtree(struct bynumbernode *n, int i, char *w){
    if (n == NULL){
        n = bynumbernodealloc();
        n->number = i;
        n->wordlist = NULL;
        n->wordlist = addwordtolist(n->wordlist, w);
    }
    else if (n->number == i){
        addwordtolist(n->wordlist, w);
    }
    else if (n->number < i){
        n->left = addnumtree(n->left, i, w);
    }
    else{
        n->right = addnumtree(n->right, i, w);
    }
    return n;
}

struct bynumbernode *traverse(const struct tnode *p, struct bynumbernode *q){
    if(p != NULL){
        q = traverse(p->left, q);
        q = addnumtree(q, p->count, p->word);
        q = traverse(p->right, q);
    }
    return q;
}

void main(){
    struct tnode *root;
    char word[MAXWORD];
    
    struct bynumbernode *nroot;


    root = NULL;
    nroot = NULL;
    while(getword(word, MAXWORD) != EOF){
        if(isalpha(word[0])){
            root = addtree(root, word);
        }
    }
    
    nroot = traverse(root, nroot);

    printf("Words by frequency:\n");
    
    treeprint(nroot);
    return;
}





Explanation

ab
ab
bc
cd
ef
gh
ab
x
Words and their frequencies:
bc->1
cd->1
ef->1
gh->1
ab->3