void
mitm(string uri, resource document)
word_info * words = get_words(document)
if (words)
concordance::add_page(uri, words)
concordance::print_words(std::cout)
static sort_node * sort_by_words(
sort_node * words_s, sort_node *& words_e)
// Compress nulls.
for snp = words_e - 1; words_s <= snp; --snp
if (not snp->word)
*snp = *--words_e;
// Sort by word.
for snp = words_s; snp < words_e; snp++
min = snp
for j = snp + 1; j < words_e; j++
if min->word->word > j->word->word
min = j
swap(snp, min)
// Find leftmost run.
for snp = words_s; snp < words_e; snp++
if snp->word->word != words_s->word->word
break
sort_by_count(words_s, snp)
return snp
void
print_pages(std::ostream & os)
sort_node * const words_s, * words_e =
top_words(pages_visited)
while true
sort_node * const words_m
= sort_by_words(words_s, words_e)
if (words_s >= words_e) break
for snp = words_s; snp < words_m; snp++
assert(snp->word and snp->page)
os << snp->word->word << " "
<< snp->page->page_name << " "
<< snp->word->cnt << " "
<< snp->page->word_count << "\n"
snp->word = snp->word->next
delete [] words_s
| c | n | - | + | y | |
| empty page | 0 | 0 | 0 | 0 | 12 |
| empty body | 0 | 1 | 0 | 1 | 10 |
| broken body | 0 | 1 | 0 | 0 | 11 |
| spaced body | 0 | 7 | 0 | 2 | 3 |
| tabbed body | 0 | 8 | 0 | 1 | 3 |
| 1000 words | 0 | 5 | 1 | 1 | 5 |
| 50 words | 0 | 8 | 1 | 1 | 2 |
| two words | 0 | 7 | 0 | 2 | 3 |
| redundant words | 0 | 8 | 0 | 2 | 2 |
| repeated words | 0 | 10 | 0 | 1 | 1 |
| changing words | 0 | 10 | 0 | 0 | 2 |
long findBodyStart(
long start, long end, string totalString)
for (long pos = start; pos < end; pos++)
if totalString[pos] == '<'
if toupper(totalString[pos+1]) == 'B'
if toupper(totalString[pos+2]) == 'O'
if toupper(totalString[pos+3]) == 'D'
if toupper(totalString[pos+4]) == 'Y'
pos +=5
while (totalString[pos] != '>') pos++
if totalString[pos] == '>' && pos < end
return(pos+1)
long findBodyEnd(
long start, long end, string totalString)
for long pos = start; pos + 6 < end; pos++
if totalString[pos] == '<'
if totalString[pos+1] == '/'
if toupper(totalString[pos+2]) == 'B'
if toupper(totalString[pos+3]) == 'O'
if toupper(totalString[pos+4]) == 'D'
if toupper(totalString[pos+5]) == 'Y'
if totalString[pos+6] == '>'
return pos
void StrLinkedList::Sort()
LinkedNode
* Current = Head_Node,
* SmallestNode,
* Next
int intsmallestcount
std::string strsmallestsword
while Current != NULL
SmallestNode = Current
intsmallestcount = Current->count
strsmallestsword = Current->word
Next = Current->Next_Node
while Next != NULL
if Next->count < intsmallestcount
SmallestNode = Next
intsmallestcount = Next->count
strsmallestsword = Next->word
Next = Next->Next_Node
SmallestNode->count = Current->count
SmallestNode->word = Current->word
Current->count = intsmallestcount
Current->word = strsmallestsword
Current = Current->Next_Node
void AddressLinkedList::Sort()
Node * Current = Head,
* GreatestNode,
* Next;
int intGreatestcount
while Current != NULL
GreatestNode = Current
intGreatestcount = Current->Count
Next = Current->Next
while Next != NULL
if Next->Count > intGreatestcount
GreatestNode = Next
intGreatestcount = Next->Count
Next = Next->Next
GreatestNode = Current
Current->Count = intGreatestcount
Current = Current->Next
while Current != NULL
*smallest = *Current
Next = Current->Next_Node
while Next != NULL
if Next->count < smallest->count
smallest = *Next
Next = Next->Next_Node
swap(*smallest, *current)
swap(smallest->next current->next)
I've writting the following code ostringstream message message << *this document.size = message.str().size() document.data = new char [document.size] document.data = message.str().c_str() and gcc is complaining about LList.cc:55: warning: invalid conversion from const char* to char* While this is just a warning and the code seems to work, is there a better way to do this?
document.data = new char [document.size] document.data = message.str().c_str()
has two problems, here's the first.
document.data = new char [document.size] document.data = message.str().c_str()
has two problems, here's the second.
This page last modified on 5 December 2003.