mitm(document) word_list * words = words_and_counts(document) * wp for (wp = words; wp; wp = wp->next) std::cout << wp->count << " " << wp->word << "\n" wp = words while wp words = wp->next delete wp wp = words
count
, word
, and next
.
Modify your word-scraping version of mitm()
to output to
std-out a count of the number of times each word appears in the given document.
[ . . . ] Lines should be output in order of increasing count;
word_list * words_and_counts(document) return sort_words_by_count( count_words( words_in_document(document)))
words_in_document()
has already been (mostly) done.
sort_words_by_count()
should be straightforward.
count_words()
.
count_words()
seems like a lot of work.
count_words()
and redo
words_in_document()
to count the words too.
word_list * words_and_counts(document) return sort_words_by_count( words_in_document(document)))
words_in_document()
creates a word-count list.
sort_words_by_count()
is new.
mitm()
outputs words and counts.
<body
broken body.
ulimit -v 2650
Date: Thu, 30 Oct 2003 20:52:44 (EST)
Subject: Submitted files
From: s0------@monmouth.edu
To: rclayton@monmouth.edu
When I submitted my files today I first accidentally submitted files in the
wrong directory. I then resubmitted with the correct files. I received the
email for the first submission after I received the email for the second
one. I would like to make sure that the second submission was the one that
was officially submitted at the deadline.
Unfortunately, it was not.
void display_results() unsigned MyCnt = 4294967295U; node* NodeCountMax = NULL; node* curNode = head; for i = 0; i < curNode->size; i++ do if (curNode->count < MyCnt) && not curNode->displayed NodeCountMax = curNode MyCnt = curNode->count curNode = curNode->next while curNode if (NodeCountMax) { cout << "whatever..." NodeCountMax->displayed = true // decrease MyCnt = 0 MyCnt = 4294967295U curNode = head
4294967295U
?)
void display_results(node * head) node dummy = { "", head } while dummy.next max = &dummy for n = max->next; n->next; n = n->next if max->next->count < n->next->count) max = n out << max->count << max->word n = max->next max->next = max->next->next; delete n
while (counter < sitedata.size()) if (sitedata[counter] == '<') ++counter while (sitedata[counter] == ' ') ++counter while (sitedata[counter] != '>') ++counter } ++counter
counter
can be run off the end of the string.
while (i = data.find("<", i)) != npos i = data.find_first_not_of(" ", i + 1) if i == npos, break i = data.find(">", i + 1) if i == npos, break
for int i = 0; document.data[i] != '\0'; i++ if document.data[i] == '<' i++ while (isspace(document.data[i])) i++ if toupper(document.data[i]) == 'B' i++ if toupper(document.data[i]) == 'O' i++ if toupper(document.data[i]) == 'D' i++ if toupper(document.data[i]) == 'Y' i++ while document.data[i] != '>' i++ if document.data[i] == '>' start = i + 1 else i-- else i-- else i-- else i--
if ((data[i] == 'b' || data[i] == 'B') && (data[i+1] == 'o' || data[i+1] == 'O') && (data[i+2] == 'd' || data[i+2] == 'D') && (data[i+3] == 'y' || data[i+3] == 'Y')) { // whatever } // And sometime later... if ((data[i] == '/') && (data[i+1] == 'b' || data[i+1] == 'B') && (data[i+2] == 'o' || data[i+2] == 'O') && (data[i+3] == 'd' || data[i+3] == 'D') && (data[i+4] == 'y' || data[i+4] == 'Y') && (data[i+5] == '>')) { // whatever }
bool has_str(string str, int i, char * word) if str.size() >= i + strlen(word) for j = 0; j < strlen(word); j++ if str[i + j] != word[j] return false return true
or, even better,
str.find(word, i);
item & list:: operator [](unsigned x) if (x < size) && (x >= 0) node* spot = head for (unsigned i = 0; i < x; i++) spot=spot->next return spot->data else cerr << x << " does not exist\n" exit(1);
item & list:: operator [] (unsigned x) node * spot = head while spot and x-- spot = spot->next if spot return spot->data cerr << "bad list index" abort()
for (i = 0; i < lst.size(); i++) if (lst[i] == data) return true return false
This page last modified on 10 November 2003.