<main procedures>= (U→) int main() { std::string emsg; regular_expression regexp; while (input_regexp(std::cin, regexp, emsg)) if (not emsg.empty()) std::cerr << "! " << emsg << "\n"; else { const language l = generate(regexp, emsg); if (not emsg.empty()) std::cerr << "! " << emsg << "\n"; else output_language(std::cout, l); } }
Definesmain
(links are to index).
This solution would work if the data types regular_expression
and
language
and the procedures input_regexp()
, generate()
, and
output_language()
were defined, but, alas, they aren't. It is usually
better to define the procedures first, then the data structures. The data
structures are simpler than the procedures, and working through the procedure
definitions provides helpful information about what the data structures should
be able to do.
<regular-expression generating procedures>= (U→) [D→] language generate(regular_expression regexp, std::string & emsg) { // Return a string list containing the regular language generated by the // given regular expression. language l; while (not regexp.empty()) { const std::string token = regexp.first_token(); regexp = regexp.rest_tokens(); language l1; if (token == "|") l1 = generate_choice(regexp, emsg); else if (token == "?") l1 = generate_option(regexp, emsg); else if (token == "*") l1 = generate_repetition(regexp, emsg); else if (token == "(") l1 = generate_group(regexp, emsg); else l1.add(token); if (not emsg.empty()) return language(); l = l.empty() ? l1 : language_utils::product(l, l1); } return l; }
Definesgenerate
(links are to index).
To generate the regular language associated with the option regular expression
? regexp
, just generate the regular language associated with regexp
and
add the empty string to represent what happens when the option isn't taken.
For example, ? a
generates {"a", ""}
.
<regular-expression generating procedures>+= (U→) [←D→] static language generate_option(regular_expression & option, std::string & emsg) { // Return a string list containing the regular language generated by the // given option, which is assumed to be stripped of the ? token. Store into // the given reference the given regular expression with the optional regular // expression stripped off. If untoward anything happens, store an // informative error message in the given reference and return an undefined // value; otherwise the string reference is untouched. language l1 = generate_head_regexp(option, "option", emsg); l1.add(""); return l1; }
Definesgenerate_option
(links are to index).
To generate the regular language associated with the choice regular expression
| regexp1 regexp2
, take the union of the regular languages associated with
regexp1
and regexp2
. For example, | a b
generates {"a", "b"}
.
<regular-expression generating procedures>+= (U→) [←D] static language generate_choice(regular_expression & choice, std::string & emsg) { // Return a string list containing the regular language generated by the // given choice, which is assumed to be stripped of the * token. Store into // the given reference the given regular expression with the optional regular // expression stripped off. If untoward anything happens, store an // informative error message in the given reference and return an undefined // value; otherwise the string reference is untouched. const language l1 = generate_head_regexp(choice, "choice", emsg); if (not emsg.empty()) return language(); const language l2 = generate_head_regexp(choice, "choice", emsg); if (not emsg.empty()) return language(); return language_utils::sum(l1, l2); }
Definesgenerate_choice
(links are to index).
<main.cc
>=
#include <iostream>
#include <string>
#include "regular-expression.h"
#include "regexp-gen.h"
#include "language.h"
// Deja vu, c++ style.
static std::string trim(const std::string &);
static void output_language(std::ostream &, const language &);
static bool
input_regexp(
std::istream & is, regular_expression & regexp, std::string & emsg) {
// Read from the given input stream a regular expression and store it in the
// given reference. If nothing untoward happened, return true and leave the
// emsg untouched; otherwise return false and store an error message in emsg.
std::string str;
while (std::getline(is, str)) {
str = trim(str);
if (not str.empty()) {
regexp = regular_expression(str);
return true;
}
}
return false;
}
<main procedures>
static void
output_language(std::ostream & os, const language & l) {
// Write to the os the strings in l, one per line.
for (unsigned i = 0; i < l.size(); i++)
os << l.get(i) << "\n";
}
static std::string
trim(const std::string & str) {
// Return a copy of str with all leading and trailing space characters
// deleted.
const char * const space_chars = " \t";
unsigned l = str.find_first_not_of(space_chars);
if (l == std::string::npos)
l = 0;
unsigned r = str.find_last_not_of(space_chars);
if (r == std::string::npos)
r = str.size();
else
r++;
return str.substr(l, r - l);
}
Definesmain.cc
(links are to index).
<regexp-gen.h
>=
#ifndef _regexp_gen_h_defined_
#define _regexp_gen_h_defined_
#include <string>
#include "language.h"
#include "regular-expression.h"
// Return a string list containing all strings from the language generated by
// the regular expression in the given string list.
extern language generate(const regular_expression, std::string &);
#endif
Definesregexp-gen.h
(links are to index).
<regexp-gen.cc
>=
#include <cassert>
#include "regexp-gen.h"
// Deja vu c++ style.
static language generate_choice(regular_expression &, std::string &);
static language generate_option(regular_expression &, std::string &);
static language generate_repetition(regular_expression &, std::string &);
static language generate_group(regular_expression &, std::string &);
static language generate_head_regexp(
regular_expression &, const std::string &, std::string &);
<regular-expression generating procedures>
static language
generate_head_regexp(
regular_expression & regexp, const std::string & what, std::string & emsg) {
regular_expression head = regexp.head(emsg);
if (regexp.empty())
return language();
regexp = regexp.tail(emsg);
return generate(head, emsg);
}
static language
generate_repetition(regular_expression & repetition, std::string & emsg) {
// Return a string list containing the regular language generated by the
// given repetition.
language l = generate_head_regexp(repetition, "repetition", emsg);
if (not emsg.empty())
return language();
l = language_utils::sum(l, language_utils::product(l, l));
l.add("");
return l;
}
static language
generate_group(regular_expression & group, std::string & emsg) {
// Return a string list containing the regular language generated by the
// given group.
language l;
while (true) {
if (group.empty()) {
emsg = "Missing close parens";
return language();
}
if (group.first_token() == ")") {
group = group.rest_tokens();
break;
}
const language l1 = generate_head_regexp(group, "group", emsg);
assert(emsg.empty());
l = language_utils::product(l, l1);
}
return l;
}
#ifdef REGEXPGENTESTING
// g++ -g -o regularexpressiontesting -DREGEXPGENTESTING -ansi -pedantic -Wall re.cc language.cc regular-expression.cc string-list.cc && ./regularexpressiontesting
int main() {
std::string emsg;
language l = generate(regular_expression("a"), emsg);
assert(emsg.empty());
assert(l.size() == 1);
assert(l.get(0) == "a");
l = generate(regular_expression("? a"), emsg);
assert(emsg.empty());
assert(l.size() == 2);
assert(l.get(0) == "a" or l.get(0) == "");
assert(l.get(1) == "a" or l.get(1) == "");
assert(l.get(0) ≠ l.get(1));
regular_expression re("a b");
l = generate_choice(re, emsg);
assert(emsg.empty());
assert(re.empty());
assert(l.size() == 2);
assert(l.get(0) == "a" or l.get(0) == "b");
assert(l.get(1) == "a" or l.get(1) == "b");
assert(l.get(0) ≠ l.get(1));
l = generate(regular_expression("| a b"), emsg);
assert(emsg.empty());
assert(l.size() == 2);
assert(l.get(0) == "a" or l.get(0) == "b");
assert(l.get(1) == "a" or l.get(1) == "b");
assert(l.get(0) ≠ l.get(1));
l = generate(regular_expression("( )"), emsg);
assert(emsg.empty());
assert(l.size() == 0);
re = regular_expression("a )");
l = generate_group(re, emsg);
assert(emsg.empty());
assert(re.empty());
assert(l.size() == 1);
assert(l.get(0) == "a");
l = generate(regular_expression("( a )"), emsg);
assert(emsg.empty());
assert(l.size() == 1);
assert(l.get(0) == "a");
}
#endif
// $Log:$
Definesregexp-gen.cc
(links are to index).
<regular-expression.h
>=
#ifndef _regular_expression_h_defined_
#define _regular_expression_h_defined_
#include "string-list.h"
class regular_expression {
public:
// Return true iff this regular expression is empty.
bool empty() const;
// Return the first (leftmost) token in this regular expression.
const std::string & first_token() const;
// Return the first (leftmost) simple regular expression in this regular
// expression.
regular_expression head(std::string &) const;
// Return an empty regular expression.
regular_expression();
// Return a regular expression parsed from the given string.
regular_expression(const std::string &);
// Return all but the first (leftmost) token in this regular expression.
regular_expression rest_tokens() const;
// Return all but the first (leftmost) simple regular expression in this
// regular expression.
regular_expression tail(std::string &) const;
private:
string_list regexp;
};
#endif
Definesregular-expression.h
(links are to index).
<regular-expression.cc
>=
#include <cassert>
#include "regular-expression.h"
static unsigned
after_head(const string_list & regexp, unsigned start, std::string & emsg) {
// Return the index one past the index of the end token in the first
// (leftmost) simple regular expression in the given regular expression.
if (starthtml_symbol(ge)regexp.piece_count)
return 0;
if ("*" == regexp.pieces[start]) {
start = after_head(regexp, start + 1, emsg);
if (emsg.empty() ∧ (start == 0))
emsg = "Missing regular expression in repetition";
return start;
}
if ("?" == regexp.pieces[start]) {
start = after_head(regexp, start + 1, emsg);
if (emsg.empty() ∧ (start == 0))
emsg = "Missing regular expression in option";
return start;
}
if ("|" == regexp.pieces[start]) {
start = after_head(regexp, start + 1, emsg);
if (emsg.empty() ∧ (start == 0)) {
emsg = "Missing regular expression in choice";
return start;
}
start = after_head(regexp, start, emsg);
if (emsg.empty() ∧ (start == 0))
emsg = "Missing regular expression in choice";
return start;
}
if ("(" == regexp.pieces[start]) {
start++;
if (starthtml_symbol(ge)regexp.piece_count) {
emsg = "Missing right paren";
return 0;
}
while (")" ≠ regexp.pieces[start]) {
start = after_head(regexp, start, emsg);
if (emsg.empty() ∧ (0 == start)) {
emsg = "Missing right paren";
return start;
}
}
return start + 1;
}
return start + 1;
}
bool regular_expression::
empty() const {
return 0 == regexp.piece_count;
}
const std::string & regular_expression::
first_token() const {
assert(not empty());
return regexp.pieces[0];
}
regular_expression regular_expression::
head(std::string & emsg) const {
assert(not empty());
regular_expression re;
re.regexp = regexp;
re.regexp.piece_count = after_head(re.regexp, 0, emsg);
return re;
}
regular_expression::
regular_expression () {
}
regular_expression::
regular_expression (const std::string & str) {
unsigned s = 0;
while (true) {
s = str.find_first_not_of(" \t\n", s);
if (std::string::npos == s)
break;
unsigned e = str.find_first_of(" \t\n", s);
if (std::string::npos == e)
e = str.length();
regexp.add(str.substr(s, e - s));
s = e;
}
}
regular_expression regular_expression::
rest_tokens() const {
assert(not empty());
regular_expression re = *this;
re.regexp.piece_count = regexp.piece_count - 1;
for (unsigned i = 1; i < regexp.piece_count; i++)
re.regexp.pieces[i - 1] = regexp.pieces[i];
return re;
}
regular_expression regular_expression::
tail(std::string & emsg) const {
assert(not empty());
regular_expression re;
unsigned i = after_head(regexp, 0, emsg);
if (emsg.empty()) {
re.regexp = regexp;
re.regexp.piece_count = 0;
while (i < regexp.piece_count)
re.regexp.pieces[re.regexp.piece_count++] = re.regexp.pieces[i++];
}
return re;
}
#ifdef REGULAREXPRESSIONTESTING
// g++ -g -o regularexpressiontesting -DREGULAREXPRESSIONTESTING -ansi -pedantic -Wall regular-expression.cc string-list.cc && ./regularexpressiontesting
int main() {
std::string emsg;
string_list sl;
sl.add("a");
sl.add("b");
assert((after_head(sl, 0, emsg) == 1) and emsg.empty());
sl.piece_count = 0;
sl.add("?");
sl.add("b");
assert((after_head(sl, 0, emsg) == 2) and emsg.empty());
sl.piece_count = 2;
sl.pieces[0] = "*";
sl.pieces[1] = "a";
assert((after_head(sl, 0, emsg) == 2) and emsg.empty());
sl.piece_count = 3;
sl.pieces[0] = "|";
sl.pieces[1] = "a";
sl.pieces[2] = "a";
assert((after_head(sl, 0, emsg) == 3) and emsg.empty());
sl.pieces[0] = "(";
sl.pieces[1] = "a";
sl.pieces[2] = ")";
assert((after_head(sl, 0, emsg) == 3) and emsg.empty());
sl.piece_count = 4;
sl.pieces[0] = "|";
sl.pieces[1] = "a";
sl.pieces[2] = "(";
sl.pieces[3] = ")";
assert((after_head(sl, 0, emsg) == 4) and emsg.empty());
sl.piece_count = 1;
sl.pieces[0]= "(";
after_head(sl, 0, emsg);
assert(not emsg.empty());
emsg = "";
regular_expression re1("a b");
regular_expression re2 = re1.head(emsg);
assert(emsg.empty());
assert(re2.first_token() == "a");
assert(re2.rest_tokens().empty());
re1 = regular_expression("a b");
re2 = re1.tail(emsg);
assert(emsg.empty());
assert(re2.first_token() == "b");
assert(re2.rest_tokens().empty());
re1 = regular_expression("( a )");
re1 = re1.rest_tokens();
assert(re1.first_token() == "a");
}
#endif
// $Log:$
Definesregular-expression.cc
(links are to index).
Languages
A language is a set of strings, which is almost a list of strings except for
duplicate strings, which should be suppressed in a set.
<the langauge public interface>= (U→) // Add the given string to this language if it's not there already. void add(const std::string &); // Return true iff this language has no strings. bool empty() const; // Return from this language the given string. const std::string & get(unsigned) const; // Return the number of strings in this language. unsigned size() const { return strings.piece_count; }
<language utility procedures>= (U→) // Return the crossproduct of the given languages. extern language product(const language &, const language &); // Return the union of the given languages. extern language sum(const language &, const language &);
<language.h
>=
#ifndef _language_h_defined_
#define _language_h_defined_
#include "string-list.h"
class language {
public:
<the langauge public interface>
private:
string_list strings;
};
namespace language_utils {
<language utility procedures>
}
#endif
Defineslanguage.h
(links are to index).
<language.cc
>=
#include <cassert>
#include "language.h"
namespace language_utils {
language
product(const language & l1, const language & l2) {
if (l1.empty())
return l2;
if (l2.empty())
return l1;
language product;
for (unsigned i = 0; i < l1.size(); i++) {
const std::string & s1 = l1.get(i);
for (unsigned j = 0; j < l2.size(); j++) {
const std::string & s2 = l2.get(j);
product.add(s1 + ((s1.empty() or s2.empty()) ? "" : " ") + s2);
}
}
return product;
}
language
sum(const language & l1, const language & l2) {
// Return the union of the given languages.
language l = l1;
for (unsigned i = 0; i < l2.size(); i++)
l.add(l2.get(i));
return l;
}
}
void language::
add(const std::string & str) {
if (not strings.contains(str))
strings.add(str);
}
bool language::
empty() const {
return size() == 0;
}
const std::string & language::
get(unsigned i) const {
assert(i < size());
return strings.pieces[i];
}
#ifdef LANGUAGETESTING
// g++ -g -o languagetesting -DLANGUAGETESTING -ansi -pedantic -Wall language.cc string-list.cc && ./languagetesting
int main() {
language l;
assert(l.empty());
assert(l.size() == 0);
l.add("hello");
assert(not l.empty());
assert(l.size() == 1);
assert(l.get(0) == "hello");
l.add("hello");
assert(not l.empty());
assert(l.size() == 1);
assert(l.get(0) == "hello");
l = language();
l.add("a");
l.add("b");
language l2;
l2.add("1");
l2.add("2");
language l3 = language_utils::sum(l, l2);
assert(l3.size() == 4);
l3.add("1");
assert(l3.size() == 4);
l3.add("2");
assert(l3.size() == 4);
l3.add("a");
assert(l3.size() == 4);
l3.add("b");
assert(l3.size() == 4);
l3 = language_utils::product(l, l2);
assert(l3.size() == 4);
l3.add("a 1");
assert(l3.size() == 4);
l3.add("a 2");
assert(l3.size() == 4);
l3.add("b 1");
assert(l3.size() == 4);
l3.add("b 2");
assert(l3.size() == 4);
}
#endif
Defineslanguage.cc
(links are to index).
<string-list.h
>=
#ifndef _string_list_h_defined_
#define _string_list_h_defined_
#include <string>
class string_list {
public:
unsigned piece_count, piece_max;
std::string * pieces;
// Add the given string to this string list; do nothing if the string's
// already in this string list.
void add(const std::string &);
// Return true iff this string list contains the given string.
bool contains(const std::string &) const;
// Assign the given string list to this string list.
string_list & operator = (const string_list &);
// Return the union of this string list with the given string list.
string_list operator + (const string_list &) const;
// Create an empty string list.
string_list();
// Get rid of this string list.
~string_list();
// Return a copy of the given string list.
string_list(const string_list &);
private:
void clear();
void copy(const string_list &);
};
#endif
Definesstring-list.h
(links are to index).
<string-list.cc
>=
#include <cassert>
#include "string-list.h"
void string_list::
add(const std::string & str) {
assert(piece_max > 0);
assert(piece_count ≤ piece_max);
if (piece_count == piece_max) {
piece_max *= 2;
std::string * n = new std::string [piece_max];
for (unsigned i = 0; i < piece_count; i++)
n[i] = pieces[i];
delete [] pieces;
pieces = n;
}
pieces[piece_count++] = str;
}
bool string_list::
contains(const std::string & str) const {
for (unsigned i = 0; i < piece_count; i++)
if (str == pieces[i])
return true;
return false;
}
string_list & string_list::
operator = (const string_list & sl) {
if (this ≠ &sl) {
piece_count = 0;
copy(sl);
}
return *this;
}
string_list string_list::
operator + (const string_list & sl) const {
if (&sl == this)
return *this;
string_list u;
u.piece_count = piece_count + sl.piece_count;
u.pieces = new std::string [u.piece_count];
unsigned i = 0;
for (unsigned j = 0; j < piece_count; j++)
u.pieces[i++] = pieces[j];
for (unsigned j = 0; j < sl.piece_count; j++) {
const std::string & s = sl.pieces[j];
if (not contains(s))
u.pieces[i++] = s;
}
u.piece_count = i;
return u;
}
string_list::
string_list() :
piece_count(0),
piece_max(8),
pieces(new std::string [piece_max]) {
}
string_list::
~string_list() {
delete [] pieces;
pieces = 0;
piece_count = piece_max = 0;
}
string_list::
string_list(const string_list & sl) :
piece_count(0),
piece_max(8),
pieces(new std::string [piece_max]) {
copy(sl);
}
void string_list::
copy(const string_list & sl) {
for (unsigned i = 0; i < sl.piece_count; i++)
add(sl.pieces[piece_count]);
}
#ifdef TESTSTRINGLIST
// g++ -g -o test-sl -DTESTSTRINGLIST -ansi -pedantic -Wall -Weffc++ string-list.cc && ./test-sl
int main() {
string_list sl1, sl2;
assert(not sl1.contains("hello"));
sl1.add("hello");
sl2.add("world");
string_list sl3 = sl1 + sl2;
assert(2 == sl3.piece_count);
assert(sl3.contains(sl1.pieces[0]));
assert(sl3.contains(sl2.pieces[0]));
sl2.pieces[0] = "hello";
sl3 = sl1 + sl2;
assert(1 == sl3.piece_count);
assert(sl3.contains(sl1.pieces[0]));
}
#endif
Definesstring-list.cc
(links are to index).
language.cc
>: D1
language.h
>: D1
main.cc
>: D1
regexp-gen.cc
>: D1
regexp-gen.h
>: D1
regular-expression.cc
>: D1
regular-expression.h
>: D1
string-list.cc
>: D1
string-list.h
>: D1