Lecture notes for CS 503, Advanced Programming I

Advanced Programming I Lecture Notes

3 April 2007 • Hashing Basics


For example, running this code generates a random-permutation hash:

#! /bin/bash
#
# cranperm256 - write to std-out a pair of hash functions, one of which
#   produces an 8-bit hash index and the other a 32-bit hash value.  Each run
#   of cranperm265 writes a different pair of hash functions.

ranperm256 |
awk 'BEGIN {
       print "// See Fast hashing of variable-length text strings by Peter"
       print "// Pearson in CACM, June 1990 for details."
       print ""
       print "unsigned char"
       print "hash1(unsigned char v[], unsigned n, unsigned seed = 0) {"
       printf "  static unsigned char ranperm[] = {"    
       rowsep = "\n   "
       sep = ""
       rowsize = 15
       rs = 0
       }

      {for (i = 1; i <= NF; i++) {
         printf rowsep sep "%4d", $i
	 sep = ","
	 if (++rs % rowsize == 0) {
	   rowsep = ",\n   "
	   sep = ""
	   }
	 else
	   rowsep = ""
	 }
       }

     END {
       print "\n    };"
       print "  unsigned char h = ranperm[(unsigned char) seed];"
       print "  for (unsigned i = 0; i < n; i++)"
       print "    h ^= ranperm[v[i]];"
       print "  return h;"
       print "  }"
       print ""
       print "unsigned hash4(unsigned char v[], unsigned n) {"
       print "  return"
       print "    ((unsigned) hash1(v, n) << 24) + "
       print "    ((unsigned) hash1(v, n, 1) << 16) + "
       print "    ((unsigned) hash1(v, n, 2) <<  8) + "
       print "    ((unsigned) hash1(v, n, 3));"
       print "  }"
       }
    '


This page last modified on 24 January 2006.