bool table[N] = { false } void add(k) table[h(k)] = true bool find(k) return table[h(k)] unsigned hash(k) return ( /* whatever */ ) % N
/usr/share/dict/american-english
has
98,568 words in 931,467 bytes.
bzip2
compresses this down to
333,414
bytes or 2.6×106 bits.
american-english
.
american-english
with a 0.12 load factor and passes 3,409 words.
load | false | ||||
---|---|---|---|---|---|
buckets | factor | positives | |||
1,300,000 | 0.07 | 317 | 8% | ||
1,200,000 | 0.07 | 295 | 7% | ||
1,100,000 | 0.09 | 337 | 8% | ||
1,000,000 | 0.09 | 368 | 9% | ||
900,000 | 0.10 | 393 | 10% | ||
800,000 | 0.12 | 439 | 11% | ||
700,000 | 0.13 | 538 | 14% | ||
600,000 | 0.15 | 571 | 15% | ||
500,000 | 0.17 | 669 | 17% |
|
|
|
|
void add(w) table[h1(w)] = ... = table[hk(w)] = true bool find(k) return table[h1(w)] ∧ ... ∧ table[hk(w)]
load | false | ||||
---|---|---|---|---|---|
order | factor | positives | |||
1 | 0.12 | 439 | 11% | ||
2 | 0.21 | 162 | 4% | ||
3 | 0.31 | 98 | 3% | ||
4 | 0.38 | 82 | 2% | ||
5 | 0.45 | 79 | 2% |