To implement exhaustive search, we need a way of generating all combinations of compressed strings. There are a couple of ways to do this, but the easiest way is to number the n input strings from 1 to n. Every arrangment of the numbers 1 to n then represents a compressed string with component strings appearing in the order given by the number arrangement.
For example, the input kissing
, singer
, and gerbil
can be
numbered
1: kissing 2: singer 3: gerbil
and the number arrangement 3 1 2 represents the gerbilkissingsinger
, which
compresses to the string gerbilkissinger
.
Now all we need is a way to produce all possible combinations of the numbers 1 through n, which is a well-known task called permutation. There is a simple recursive algorithm for generating the permutations of a list of n distinct items:
<ouroboros-compression routines>= static void permulate(uvec perm, unsigned mid, const svec & strs, result & r) { // Generate a permuation. if (mid == perm.size()) f(perm, strs, r); else for (unsigned m = mid; m < perm.size(); m++) { std::swap(perm[mid], perm[m]); permulate(perm, mid + 1, strs, r); std::swap(perm[m], perm[mid]); } }
Definespermulate
(links are to index).
<ouroboros-compression routines>+= static void f(const uvec & permutation, const svec & strs, result & r) { // Generate a compressed string using the given string list and permutation. // If the resulting string is shorter than the given current best, store the // resulting string into the given current best. if (strs.empty()) return; std::string str = strs[permutation[0]]; str_iter b = str.begin(); Key key; key.push_back(0); key.push_back(str.size()); // invariants: // str is the maximally compressed verstion of the string sequence // strs[0..i). // keys is the uncompression string for str. for (unsigned i = 1; i < strs.size(); i++) { const std::string & s = strs[permutation[i]]; const str_iter e = str.end(), m = maximal_suffix_match(b, e, s); key.push_back(m - b + 1); key.push_back(s.size() - (e - m)); str += s.substr(e - m); b = str.end() - s.size(); } if (r.str.empty() || (str.size() < r.str.size())) { r.str = str; r.key = key; } }
Definesf
(links are to index).Used below; previous and next definitions.
<ouroboros-compression routines>+= result ouroboros_compress(const svec & strs) { // Maximally ouroboros compress the given string set; return the resulting // string. result r; const unsigned str_cnt = strs.size(); uvec perm(str_cnt); for (unsigned i = 0; i < str_cnt; i++) perm[i] = i; permulate(perm, 0, strs, r); return r; }
Definesouroboros_compress
(links are to index).Used below; previous definition.
<oc.cc
>=
#include <iostream>
#include <iterator>
#include "oc.h"
#include "utils.h"
#include "debugp.h"
typedef std::string::size_type str_indx;
typedef std::vector<unsigned> uvec;
// Deja vu c++ style.
static void permulate(uvec, unsigned, const svec &, result &);
<ouroboros-compression routines>
#ifdef OCTESTING
#include <algorithm>
// g++ -o oc-testing -DOCTESTING -ansi -pedantic -gstabs -I$HOME/lib/c oc.cc utils.cc && ./oc-testing
int main() {
svec sv;
sv.push_back("abc");
sv.push_back("abcx");
sv.push_back("xabc");
result r = ouroboros_compress(sv);
while (std::next_permutation(sv.begin(), sv.end())) {
result r1 = ouroboros_compress(sv);
assert(r1.str == r.str);
assert(r1.key == r.key);
}
}
#endif
Definesoc.cc
(links are to index).This code is written to a file (or else not used).
<main>= #include <string> #include <vector> #include <iostream> #include "oc.h" // Deja vu c++ style. static void output_uncompression_key(std::ostream &, const Key &); static void output_compressed_strings(std::ostream &, const std::string &); int main() { svec strs; std::string str; while (getline(std::cin, str)) if (not str.empty()) strs.push_back(str); const result r = ouroboros_compress(strs); output_compressed_strings(std::cout, r.str); std::cout << "\n"; output_uncompression_key(std::cout, r.key); } static void output_compressed_strings(std::ostream & os, const std::string & str) { // Write the given string to the given output stream. unsigned e = str.size(), s = 0; while (e > 0) { const unsigned l = std::min(70u, e); os << str.substr(s, l) << "\n"; e -= l; s += l; } } static void output_uncompression_key(std::ostream & os, const Key & key) { // Write the given uncompression key to the given output stream. for (Key::size_type i = 0; i < key.size(); i += 2) os << key[i] << " " << key[i + 1] << "\n"; } static std::string trim(const std::string & str) { // Return the given string shorn of space characters front and back. std::string::size_type b = 0, e = str.size(); while ((b < e) and isspace(str.at(b))) b++; while ((b < e) and isspace(str.at(e - 1))) e--; return str.substr(b, e - b); }
Definesmain.cc
(links are to index).This code is written to a file (or else not used).
<utils.cc
>=
#include "utils.h"
// Deja vu c++ style.
static bool suffix_match(
const str_iter &, const str_iter &, const std::string &);
str_iter
maximal_suffix_match(
const str_iter & b, const str_iter & e, const std::string & str) {
// Return m such that [m, e) is the largest suffix of [b, e) that is also a
// prefix of str.
assert(b < e);
str_iter m = e - std::min(static_cast<unsigned>(str.size()),
static_cast<unsigned>(e - b));
// invariant: every suffix starting in [b, m) is not a prefix of str.
while ((m != e) and not suffix_match(m, e, str))
++m;
return m;
}
static bool
suffix_match(
const str_iter & b, const str_iter & e, const std::string & str) {
// Return true iff [b, e) is a prefix of str.
assert(b <= e);
assert(static_cast<unsigned>(e - b) <= str.size());
// invariant: (b + i, e) == str((e - b) - i, (e - b))
for (unsigned i = (e - b); i > 0; --i)
if (*(b + i - 1) != str[i - 1])
return false;
return true;
}
#ifdef UTILTESTING
// g++ -o utiltesting -DUTILTESTING -g -ansi -pedantic oc.cc && ./utiltesting
int main() {
std::string s = "hello";
assert(suffix_match(s.begin(), s.end(), "hello"));
assert(not suffix_match(s.begin() + 1, s.end(), "hello"));
assert(suffix_match(s.begin() + 1, s.end(), "ello"));
assert(s.begin() == maximal_suffix_match(s.begin(), s.end(), s));
assert(s.end() == maximal_suffix_match(s.begin() + 1, s.end(), s));
assert(s.begin() + 1 == maximal_suffix_match(s.begin() + 1, s.end(), "ello"));
assert(s.end() == maximal_suffix_match(s.begin() + 1, s.end(), "ell"));
}
#endif
Definesutils.cc
(links are to index).This code is written to a file (or else not used).
This page last modified on 25 March 2004.