To implement exhaustive search, we need a way of generating all combinations of compressed strings. There are a couple of ways to do this, but the easiest way is to number the n input strings from 1 to n. Every arrangment of the numbers 1 to n then represents a compressed string with component strings appearing in the order given by the number arrangement.
For example, the input kissing, singer, and gerbil can be
numbered
1: kissing 2: singer 3: gerbil
and the number arrangement 3 1 2 represents the gerbilkissingsinger, which
compresses to the string gerbilkissinger.
Now all we need is a way to produce all possible combinations of the numbers 1 through n, which is a well-known task called permutation. There is a simple recursive algorithm for generating the permutations of a list of n distinct items:
<ouroboros-compression routines>=
static void
permulate(uvec perm, unsigned mid, const svec & strs, result & r) {
// Generate a permuation.
if (mid == perm.size())
f(perm, strs, r);
else
for (unsigned m = mid; m < perm.size(); m++) {
std::swap(perm[mid], perm[m]);
permulate(perm, mid + 1, strs, r);
std::swap(perm[m], perm[mid]);
}
}
Definespermulate(links are to index).
<ouroboros-compression routines>+=
static void
f(const uvec & permutation, const svec & strs, result & r) {
// Generate a compressed string using the given string list and permutation.
// If the resulting string is shorter than the given current best, store the
// resulting string into the given current best.
if (strs.empty())
return;
std::string str = strs[permutation[0]];
str_iter b = str.begin();
Key key;
key.push_back(0);
key.push_back(str.size());
// invariants:
// str is the maximally compressed verstion of the string sequence
// strs[0..i).
// keys is the uncompression string for str.
for (unsigned i = 1; i < strs.size(); i++) {
const std::string & s = strs[permutation[i]];
const str_iter
e = str.end(),
m = maximal_suffix_match(b, e, s);
key.push_back(m - b + 1);
key.push_back(s.size() - (e - m));
str += s.substr(e - m);
b = str.end() - s.size();
}
if (r.str.empty() || (str.size() < r.str.size())) {
r.str = str;
r.key = key;
}
}
Definesf(links are to index).Used below; previous and next definitions.
<ouroboros-compression routines>+=
result
ouroboros_compress(const svec & strs) {
// Maximally ouroboros compress the given string set; return the resulting
// string.
result r;
const unsigned str_cnt = strs.size();
uvec perm(str_cnt);
for (unsigned i = 0; i < str_cnt; i++)
perm[i] = i;
permulate(perm, 0, strs, r);
return r;
}
Definesouroboros_compress(links are to index).Used below; previous definition.
<oc.cc>=
#include <iostream>
#include <iterator>
#include "oc.h"
#include "utils.h"
#include "debugp.h"
typedef std::string::size_type str_indx;
typedef std::vector<unsigned> uvec;
// Deja vu c++ style.
static void permulate(uvec, unsigned, const svec &, result &);
<ouroboros-compression routines>
#ifdef OCTESTING
#include <algorithm>
// g++ -o oc-testing -DOCTESTING -ansi -pedantic -gstabs -I$HOME/lib/c oc.cc utils.cc && ./oc-testing
int main() {
svec sv;
sv.push_back("abc");
sv.push_back("abcx");
sv.push_back("xabc");
result r = ouroboros_compress(sv);
while (std::next_permutation(sv.begin(), sv.end())) {
result r1 = ouroboros_compress(sv);
assert(r1.str == r.str);
assert(r1.key == r.key);
}
}
#endif
Definesoc.cc(links are to index).This code is written to a file (or else not used).
<main>=
#include <string>
#include <vector>
#include <iostream>
#include "oc.h"
// Deja vu c++ style.
static void output_uncompression_key(std::ostream &, const Key &);
static void output_compressed_strings(std::ostream &, const std::string &);
int
main() {
svec strs;
std::string str;
while (getline(std::cin, str))
if (not str.empty())
strs.push_back(str);
const result r = ouroboros_compress(strs);
output_compressed_strings(std::cout, r.str);
std::cout << "\n";
output_uncompression_key(std::cout, r.key);
}
static void
output_compressed_strings(std::ostream & os, const std::string & str) {
// Write the given string to the given output stream.
unsigned
e = str.size(),
s = 0;
while (e > 0) {
const unsigned l = std::min(70u, e);
os << str.substr(s, l) << "\n";
e -= l;
s += l;
}
}
static void
output_uncompression_key(std::ostream & os, const Key & key) {
// Write the given uncompression key to the given output stream.
for (Key::size_type i = 0; i < key.size(); i += 2)
os << key[i] << " " << key[i + 1] << "\n";
}
static std::string
trim(const std::string & str) {
// Return the given string shorn of space characters front and back.
std::string::size_type
b = 0,
e = str.size();
while ((b < e) and isspace(str.at(b)))
b++;
while ((b < e) and isspace(str.at(e - 1)))
e--;
return str.substr(b, e - b);
}
Definesmain.cc(links are to index).This code is written to a file (or else not used).
<utils.cc>=
#include "utils.h"
// Deja vu c++ style.
static bool suffix_match(
const str_iter &, const str_iter &, const std::string &);
str_iter
maximal_suffix_match(
const str_iter & b, const str_iter & e, const std::string & str) {
// Return m such that [m, e) is the largest suffix of [b, e) that is also a
// prefix of str.
assert(b < e);
str_iter m = e - std::min(static_cast<unsigned>(str.size()),
static_cast<unsigned>(e - b));
// invariant: every suffix starting in [b, m) is not a prefix of str.
while ((m != e) and not suffix_match(m, e, str))
++m;
return m;
}
static bool
suffix_match(
const str_iter & b, const str_iter & e, const std::string & str) {
// Return true iff [b, e) is a prefix of str.
assert(b <= e);
assert(static_cast<unsigned>(e - b) <= str.size());
// invariant: (b + i, e) == str((e - b) - i, (e - b))
for (unsigned i = (e - b); i > 0; --i)
if (*(b + i - 1) != str[i - 1])
return false;
return true;
}
#ifdef UTILTESTING
// g++ -o utiltesting -DUTILTESTING -g -ansi -pedantic oc.cc && ./utiltesting
int main() {
std::string s = "hello";
assert(suffix_match(s.begin(), s.end(), "hello"));
assert(not suffix_match(s.begin() + 1, s.end(), "hello"));
assert(suffix_match(s.begin() + 1, s.end(), "ello"));
assert(s.begin() == maximal_suffix_match(s.begin(), s.end(), s));
assert(s.end() == maximal_suffix_match(s.begin() + 1, s.end(), s));
assert(s.begin() + 1 == maximal_suffix_match(s.begin() + 1, s.end(), "ello"));
assert(s.end() == maximal_suffix_match(s.begin() + 1, s.end(), "ell"));
}
#endif
Definesutils.cc(links are to index).This code is written to a file (or else not used).
This page last modified on 25 March 2004.