// This may look like C code, but it is really -*- C++ -*- /* ************************************************************************ * * Solving a problem of "optimal" arranging words * on a page * using Dynamic Programming * * The problem is neatly printing a paragraph of text on a printer with * fixed-width fonts. The input text is a sequence of n words containing * l1, l2, ... ln characters, respectively. Each line on the printer * can hold up to 'page_width' characters. If a printed line contains words * i through j, then the number of blanks left at the end of the line (given * that there is one blank between each pair of words) is * page-width - j + i - SUM{ l[k], k=1 through j } * We wish to minimize the sum, over all lines except the last, of the * cubes of the number of blanks at the end of each line. For example, * if there are k lines with b1, b2, ..., bk blanks at the ends, * respectively, then we wish to minimize b1^3 + b2^3 + ... + b[k-1]^3. * * The problem was offered as a homework #11 in CSCI 4450, Fall 1993, * taught by Dr. Parberry. * * $Id: word_layout.cc,v 2.0 2001/04/04 22:54:51 oleg Exp oleg $ * ************************************************************************ */ #include #include #include #define INFINITY MAXINT static inline int cube(const int x) { return x*x*x; } static inline int min(const int a, const int b) { return a < b ? a : b; } /* *------------------------------------------------------------------------ * Handle the list of words */ class WordList { int count; int curr_word_idx; // 1..count struct word { word * next; const char * str; word(const char * _str) : next(0), str(_str) {} } * words; word * curr_word; word * tail; public: WordList(void) : count(0), curr_word_idx(1), words(0), curr_word(0), tail(0) {} // ~WordList(void); // We don't dispose of the list in this exercise bool q_eof(void) const { return curr_word_idx > count; } int q_idx(void) const { return curr_word_idx; } int q_count(void) const { return count; } operator const char * (void) const // Get the current word { assert( !q_eof() && curr_word ); return curr_word->str; } WordList& next(void) { assert( !q_eof() && curr_word ); ++curr_word_idx; curr_word = curr_word->next; return *this; } // Add a new word (char string) to the WordList // We do not copy the strings themselves; we merely store their refs! WordList& operator << (const char * str); }; // Add a new word (a char string) to the WordList // We do not copy the strings themselves; we merely store their refs! WordList& WordList::operator << (const char * str) { assert( str != 0 ); word * new_word = new word(str); if( words == 0 ) // Adding to the empty list curr_word = words = tail = new_word; else { assert( tail ); tail->next = new_word; tail = new_word; } count++; return *this; } /* *------------------------------------------------------------------------ * A very primitive matrix class * Note, that the indices start with 1 */ template class Matrix { T* elems; const int no_rows; const int no_cols; public: Matrix(const int _no_rows, const int _no_cols) : no_rows(_no_rows), no_cols(_no_cols) { assert( no_rows > 0 && no_cols > 0 ); elems = new T[no_rows*no_cols]; assert( elems != 0 ); } ~Matrix(void) { assert( elems != 0 ); delete [] elems; } T& operator() (const int row_index, const int col_index) { assert(row_index > 0 && col_index > 0); assert(row_index <= no_rows && col_index <= no_cols); return elems[(row_index-1)*no_cols + col_index-1]; } void print(const char * title); }; /* *------------------------------------------------------------------------ * Node of a table * where all dynamic programming is carried on * The node keeps track of a particular arrangement of words up to the * current line */ class Node { public: int line; // Number for the current line int tail_blanks; // Number of the tail blanks int cost; // Cost function so far. Note, it // includes the contribution of tail // blanks of all the lines before // but NOT including the current line Node(void) { line = -1; cost = INFINITY; tail_blanks = -1; } void print(void) const; bool q_empty(void) const { return line <= 0; } }; static const Node Empty_node; void Node::print(void) const { if( this->q_empty() ) printf("{---Empty---}"); else printf("{l%d, t%d,%3dc}",line,tail_blanks,cost); } template void Matrix::print(const char * title) { printf("\n\n\t\t%dx%d matrix '%s'\n",no_rows,no_cols,title); for(register int i=1; i<=no_rows; i++) { for(register int j=1; j<=no_cols; j++) (*this)(i,j).print(), printf(" "); printf("\n"); } printf("\n"); } // Explicit template instantiation to please g++ //template class Matrix; /* *------------------------------------------------------------------------ * The main module of the algorithm * It takes the list of words and how many symbols fit across the page, and * returns the value of the optimal cost function * * The idea of the algorithm: * When placing the i-th word, we have generally two choices: either * to put the word on the current line (providing it fits), or to * start a new line and put the word at the beginning of it. Note, that * if the new line is started, the layout of the previous lines is * fixed, and its cost would contribute to the final cost no matter * how the rest of the words are positioned. So, if we have two * partial layouts (layout of the words 1 through i) and in both cases * we made a decision to place the (i+1)st word at the beginning of a new * line, then the layout with the smaller partial cost will be always * better, no matter how optimally the rest of the words (i through k) * are positioned. * * The complexity of the algorithm is N^2, N being the number of words. * */ static int optimal_layout(WordList& word_list, const int page_width) { const int no_words = word_list.q_count(); assert( no_words > 0 ); assert( page_width > 1 ); Matrix layout(no_words,no_words); // Handle the first word, which is always // placed at the beginning of the 1st line { assert( word_list.q_idx() == 1 ); Node& first_node = layout(1,1); first_node.line = 1; first_node.tail_blanks = page_width - strlen((const char *)word_list); assert(first_node.tail_blanks >= 0); first_node.cost = 0; } layout.print("At the outset"); while( !word_list.next().q_eof() ) { const int curr_word = word_list.q_idx(); const int curr_word_size = strlen((const char *)word_list); for(int curr_variant = 1; curr_variant < curr_word; curr_variant++) { Node& curr_node = layout(curr_variant,curr_word); Node& parent_node = layout(curr_variant,curr_word-1); // Check to see if curr_line is filled-up // already, i.e. even curr_word-1 didn't // fit into this line if( parent_node.tail_blanks < 0 ) { curr_node = Empty_node; continue; } // First try to put a node into a new line Node nl_try_node; nl_try_node.line = parent_node.line + 1; nl_try_node.tail_blanks = page_width - curr_word_size; nl_try_node.cost = parent_node.cost + cube(parent_node.tail_blanks); parent_node.print(); nl_try_node.print(); Node& nl_node = layout(curr_word,curr_word); if( nl_try_node.cost < nl_node.cost ) nl_node = nl_try_node; // Try to continue the current line of text // Remember to take into account // a blank separator from the prev. word curr_node = parent_node; curr_node.tail_blanks -= curr_word_size + 1; if( curr_node.tail_blanks < 0 ) curr_node = Empty_node; char buffer[120]; sprintf(buffer,"Iterating: laying out word '%.40s', variant #%d", (const char *)word_list,curr_variant); layout.print(buffer); } } layout.print("Finished"); int min_cost = INFINITY; register int curr_word = no_words; for(register int curr_variant=1; curr_variant<=no_words; curr_variant++) if( !layout(curr_variant,curr_word).q_empty() ) min_cost = min(min_cost,layout(curr_variant,curr_word).cost); return min_cost; } int main(void) { WordList word_list; word_list << "word1" << "word2" << "word_3" << "word4"; const int page_width = 14; printf("Optimal layout for page width %d is %d\n", page_width,optimal_layout(word_list,page_width)); return 0; }