minix/external/bsd/kyua-cli/dist/utils/text/templates.cpp

764 lines
25 KiB
C++
Raw Normal View History

// Copyright 2012 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "utils/text/templates.hpp"
#include <algorithm>
#include <fstream>
#include <sstream>
#include <stack>
#include "utils/format/macros.hpp"
#include "utils/noncopyable.hpp"
#include "utils/sanity.hpp"
#include "utils/text/exceptions.hpp"
#include "utils/text/operations.ipp"
namespace text = utils::text;
namespace {
/// Definition of a template statement.
///
/// A template statement is a particular line in the input file that is
/// preceeded by a template marker. This class provides a high-level
/// representation of the contents of such statement and a mechanism to parse
/// the textual line into this high-level representation.
class statement_def {
public:
/// Types of the known statements.
enum statement_type {
/// Alternative clause of a conditional.
///
/// Takes no arguments.
type_else,
/// End of conditional marker.
///
/// Takes no arguments.
type_endif,
/// End of loop marker.
///
/// Takes no arguments.
type_endloop,
/// Beginning of a conditional.
///
/// Takes a single argument, which denotes the name of the variable or
/// vector to check for existence. This is the only expression
/// supported.
type_if,
/// Beginning of a loop over all the elements of a vector.
///
/// Takes two arguments: the name of the vector over which to iterate
/// and the name of the iterator to later index this vector.
type_loop,
};
private:
/// Internal data describing the structure of a particular statement type.
struct type_descriptor {
/// The native type of the statement.
statement_type type;
/// The expected number of arguments.
unsigned int n_arguments;
/// Constructs a new type descriptor.
///
/// \param type_ The native type of the statement.
/// \param n_arguments_ The expected number of arguments.
type_descriptor(const statement_type type_,
const unsigned int n_arguments_)
: type(type_), n_arguments(n_arguments_)
{
}
};
/// Mapping of statement type names to their definitions.
typedef std::map< std::string, type_descriptor > types_map;
/// Description of the different statement types.
///
/// This static map is initialized once and reused later for any statement
/// lookup. Unfortunately, we cannot perform this initialization in a
/// static manner without C++11.
static types_map _types;
/// Generates a new types definition map.
///
/// \return A new types definition map, to be assigned to _types.
static types_map
generate_types_map(void)
{
// If you change this, please edit the comments in the enum above.
types_map types;
types.insert(types_map::value_type(
"else", type_descriptor(type_else, 0)));
types.insert(types_map::value_type(
"endif", type_descriptor(type_endif, 0)));
types.insert(types_map::value_type(
"endloop", type_descriptor(type_endloop, 0)));
types.insert(types_map::value_type(
"if", type_descriptor(type_if, 1)));
types.insert(types_map::value_type(
"loop", type_descriptor(type_loop, 2)));
return types;
}
public:
/// The type of the statement.
statement_type type;
/// The arguments to the statement, in textual form.
const std::vector< std::string > arguments;
/// Creates a new statement.
///
/// \param type_ The type of the statement.
/// \param arguments_ The arguments to the statement.
statement_def(const statement_type& type_,
const std::vector< std::string >& arguments_) :
type(type_), arguments(arguments_)
{
#if !defined(NDEBUG)
for (types_map::const_iterator iter = _types.begin();
iter != _types.end(); ++iter) {
const type_descriptor& descriptor = (*iter).second;
if (descriptor.type == type_) {
PRE(descriptor.n_arguments == arguments_.size());
return;
}
}
UNREACHABLE;
#endif
}
/// Parses a statement.
///
/// \param line The textual representation of the statement without any
/// prefix.
///
/// \return The parsed statement.
///
/// \throw text::syntax_error If the statement is not correctly defined.
static statement_def
parse(const std::string& line)
{
if (_types.empty())
_types = generate_types_map();
const std::vector< std::string > words = text::split(line, ' ');
if (words.empty())
throw text::syntax_error("Empty statement");
const types_map::const_iterator iter = _types.find(words[0]);
if (iter == _types.end())
throw text::syntax_error(F("Unknown statement '%s'") % words[0]);
const type_descriptor& descriptor = (*iter).second;
if (words.size() - 1 != descriptor.n_arguments)
throw text::syntax_error(F("Invalid number of arguments for "
"statement '%s'") % words[0]);
std::vector< std::string > new_arguments;
new_arguments.resize(words.size() - 1);
std::copy(words.begin() + 1, words.end(), new_arguments.begin());
return statement_def(descriptor.type, new_arguments);
}
};
statement_def::types_map statement_def::_types;
/// Definition of a loop.
///
/// This simple structure is used to keep track of the parameters of a loop.
struct loop_def {
/// The name of the vector over which this loop is iterating.
std::string vector;
/// The name of the iterator defined by this loop.
std::string iterator;
/// Position in the input to which to rewind to on looping.
///
/// This position points to the line after the loop statement, not the loop
/// itself. This is one of the reasons why we have this structure, so that
/// we can maintain the data about the loop without having to re-process it.
std::istream::pos_type position;
/// Constructs a new loop definition.
///
/// \param vector_ The name of the vector (first argument).
/// \param iterator_ The name of the iterator (second argumnet).
/// \param position_ Position of the next line after the loop statement.
loop_def(const std::string& vector_, const std::string& iterator_,
const std::istream::pos_type position_) :
vector(vector_), iterator(iterator_), position(position_)
{
}
};
/// Stateful class to instantiate the templates in an input stream.
///
/// The goal of this parser is to scan the input once and not buffer anything in
/// memory. The only exception are loops: loops are reinterpreted on every
/// iteration from the same input file by rewidining the stream to the
/// appropriate position.
class templates_parser : utils::noncopyable {
/// The templates to apply.
///
/// Note that this is not const because the parser has to have write access
/// to the templates. In particular, it needs to be able to define the
/// iterators as regular variables.
text::templates_def _templates;
/// Prefix that marks a line as a statement.
const std::string _prefix;
/// Delimiter to surround an expression instantiation.
const std::string _delimiter;
/// Whether to skip incoming lines or not.
///
/// The top of the stack is true whenever we encounter a conditional that
/// evaluates to false or a loop that does not have any iterations left.
/// Under these circumstances, we need to continue scanning the input stream
/// until we find the matching closing endif or endloop construct.
///
/// This is a stack rather than a plain boolean to allow us deal with
/// if-else clauses.
std::stack< bool > _skip;
/// Current count of nested conditionals.
unsigned int _if_level;
/// Level of the top-most conditional that evaluated to false.
unsigned int _exit_if_level;
/// Current count of nested loops.
unsigned int _loop_level;
/// Level of the top-most loop that does not have any iterations left.
unsigned int _exit_loop_level;
/// Information about all the nested loops up to the current point.
std::stack< loop_def > _loops;
/// Checks if a line is a statement or not.
///
/// \param line The line to validate.
///
/// \return True if the line looks like a statement, which is determined by
/// checking if the line starts by the predefined prefix.
bool
is_statement(const std::string& line)
{
return ((line.length() >= _prefix.length() &&
line.substr(0, _prefix.length()) == _prefix) &&
(line.length() < _delimiter.length() ||
line.substr(0, _delimiter.length()) != _delimiter));
}
/// Parses a given statement line into a statement definition.
///
/// \param line The line to validate; it must be a valid statement.
///
/// \return The parsed statement.
///
/// \throw text::syntax_error If the input is not a valid statement.
statement_def
parse_statement(const std::string& line)
{
PRE(is_statement(line));
return statement_def::parse(line.substr(_prefix.length()));
}
/// Processes a line from the input when not in skip mode.
///
/// \param line The line to be processed.
/// \param input The input stream from which the line was read. The current
/// position in the stream must be after the line being processed.
/// \param output The output stream into which to write the results.
///
/// \throw text::syntax_error If the input is not valid.
void
handle_normal(const std::string& line, std::istream& input,
std::ostream& output)
{
if (!is_statement(line)) {
// Fast path. Mostly to avoid an indentation level for the big
// chunk of code below.
output << line << '\n';
return;
}
const statement_def statement = parse_statement(line);
switch (statement.type) {
case statement_def::type_else:
_skip.top() = !_skip.top();
break;
case statement_def::type_endif:
_if_level--;
break;
case statement_def::type_endloop: {
PRE(_loops.size() == _loop_level);
loop_def& loop = _loops.top();
const std::size_t next_index = 1 + text::to_type< std::size_t >(
_templates.get_variable(loop.iterator));
if (next_index < _templates.get_vector(loop.vector).size()) {
_templates.add_variable(loop.iterator, F("%s") % next_index);
input.seekg(loop.position);
} else {
_loop_level--;
_loops.pop();
_templates.remove_variable(loop.iterator);
}
} break;
case statement_def::type_if: {
_if_level++;
const std::string value = _templates.evaluate(
statement.arguments[0]);
if (value.empty() || value == "0" || value == "false") {
_exit_if_level = _if_level;
_skip.push(true);
} else {
_skip.push(false);
}
} break;
case statement_def::type_loop: {
_loop_level++;
const loop_def loop(statement.arguments[0], statement.arguments[1],
input.tellg());
if (_templates.get_vector(loop.vector).empty()) {
_exit_loop_level = _loop_level;
_skip.push(true);
} else {
_templates.add_variable(loop.iterator, "0");
_loops.push(loop);
_skip.push(false);
}
} break;
}
}
/// Processes a line from the input when in skip mode.
///
/// \param line The line to be processed.
///
/// \throw text::syntax_error If the input is not valid.
void
handle_skip(const std::string& line)
{
PRE(_skip.top());
if (!is_statement(line))
return;
const statement_def statement = parse_statement(line);
switch (statement.type) {
case statement_def::type_else:
if (_exit_if_level == _if_level)
_skip.top() = !_skip.top();
break;
case statement_def::type_endif:
INV(_if_level >= _exit_if_level);
if (_if_level == _exit_if_level)
_skip.top() = false;
_if_level--;
_skip.pop();
break;
case statement_def::type_endloop:
INV(_loop_level >= _exit_loop_level);
if (_loop_level == _exit_loop_level)
_skip.top() = false;
_loop_level--;
_skip.pop();
break;
case statement_def::type_if:
_if_level++;
_skip.push(true);
break;
case statement_def::type_loop:
_loop_level++;
_skip.push(true);
break;
default:
break;
}
}
/// Evaluates expressions on a given input line.
///
/// An expression is surrounded by _delimiter on both sides. We scan the
/// string from left to right finding any expressions that may appear, yank
/// them out and call templates_def::evaluate() to get their value.
///
/// Lonely or unbalanced appearances of _delimiter on the input line are
/// not considered an error, given that the user may actually want to supply
/// that character sequence without being interpreted as a template.
///
/// \param in_line The input line from which to evaluate expressions.
///
/// \return The evaluated line.
///
/// \throw text::syntax_error If the expressions in the line are malformed.
std::string
evaluate(const std::string& in_line)
{
std::string out_line;
std::string::size_type last_pos = 0;
while (last_pos != std::string::npos) {
const std::string::size_type open_pos = in_line.find(
_delimiter, last_pos);
if (open_pos == std::string::npos) {
out_line += in_line.substr(last_pos);
last_pos = std::string::npos;
} else {
const std::string::size_type close_pos = in_line.find(
_delimiter, open_pos + _delimiter.length());
if (close_pos == std::string::npos) {
out_line += in_line.substr(last_pos);
last_pos = std::string::npos;
} else {
out_line += in_line.substr(last_pos, open_pos - last_pos);
out_line += _templates.evaluate(in_line.substr(
open_pos + _delimiter.length(),
close_pos - open_pos - _delimiter.length()));
last_pos = close_pos + _delimiter.length();
}
}
}
return out_line;
}
public:
/// Constructs a new template parser.
///
/// \param templates_ The templates to apply to the processed file.
/// \param prefix_ The prefix that identifies lines as statements.
/// \param delimiter_ Delimiter to surround a variable instantiation.
templates_parser(const text::templates_def& templates_,
const std::string& prefix_,
const std::string& delimiter_) :
_templates(templates_),
_prefix(prefix_),
_delimiter(delimiter_),
_if_level(0),
_exit_if_level(0),
_loop_level(0),
_exit_loop_level(0)
{
}
/// Applies the templates to a given input.
///
/// \param input The stream to which to apply the templates.
/// \param output The stream into which to write the results.
///
/// \throw text::syntax_error If the input is not valid. Note that the
/// is not guaranteed to be unmodified on exit if an error is
/// encountered.
void
instantiate(std::istream& input, std::ostream& output)
{
std::string line;
while (std::getline(input, line).good()) {
if (!_skip.empty() && _skip.top())
handle_skip(line);
else
handle_normal(evaluate(line), input, output);
}
}
};
} // anonymous namespace
/// Constructs an empty templates definition.
text::templates_def::templates_def(void)
{
}
/// Sets a string variable in the templates.
///
/// If the variable already exists, its value is replaced. This behavior is
/// required to implement iterators, but client code should really not be
/// redefining variables.
///
/// \pre The variable must not already exist as a vector.
///
/// \param name The name of the variable to set.
/// \param value The value to set the given variable to.
void
text::templates_def::add_variable(const std::string& name,
const std::string& value)
{
PRE(_vectors.find(name) == _vectors.end());
_variables[name] = value;
}
/// Unsets a string variable from the templates.
///
/// Client code has no reason to use this. This is only required to implement
/// proper scoping of loop iterators.
///
/// \pre The variable must exist.
///
/// \param name The name of the variable to remove from the templates.
void
text::templates_def::remove_variable(const std::string& name)
{
PRE(_variables.find(name) != _variables.end());
_variables.erase(_variables.find(name));
}
/// Creates a new vector in the templates.
///
/// If the vector already exists, it is cleared. Client code should really not
/// be redefining variables.
///
/// \pre The vector must not already exist as a variable.
///
/// \param name The name of the vector to set.
void
text::templates_def::add_vector(const std::string& name)
{
PRE(_variables.find(name) == _variables.end());
_vectors[name] = strings_vector();
}
/// Adds a value to an existing vector in the templates.
///
/// \pre name The vector must exist.
///
/// \param name The name of the vector to append the value to.
/// \param value The textual value to append to the vector.
void
text::templates_def::add_to_vector(const std::string& name,
const std::string& value)
{
PRE(_variables.find(name) == _variables.end());
PRE(_vectors.find(name) != _vectors.end());
_vectors[name].push_back(value);
}
/// Checks whether a given identifier exists as a variable or a vector.
///
/// This is used to implement the evaluation of conditions in if clauses.
///
/// \param name The name of the variable or vector.
///
/// \return True if the given name exists as a variable or a vector; false
/// otherwise.
bool
text::templates_def::exists(const std::string& name) const
{
return (_variables.find(name) != _variables.end() ||
_vectors.find(name) != _vectors.end());
}
/// Gets the value of a variable.
///
/// \param name The name of the variable.
///
/// \return The value of the requested variable.
///
/// \throw text::syntax_error If the variable does not exist.
const std::string&
text::templates_def::get_variable(const std::string& name) const
{
const variables_map::const_iterator iter = _variables.find(name);
if (iter == _variables.end())
throw text::syntax_error(F("Unknown variable '%s'") % name);
return (*iter).second;
}
/// Gets a vector.
///
/// \param name The name of the vector.
///
/// \return A reference to the requested vector.
///
/// \throw text::syntax_error If the vector does not exist.
const text::templates_def::strings_vector&
text::templates_def::get_vector(const std::string& name) const
{
const vectors_map::const_iterator iter = _vectors.find(name);
if (iter == _vectors.end())
throw text::syntax_error(F("Unknown vector '%s'") % name);
return (*iter).second;
}
/// Indexes a vector and gets the value.
///
/// \param name The name of the vector to index.
/// \param index_name The name of a variable representing the index to use.
/// This must be convertible to a natural.
///
/// \return The value of the vector at the given index.
///
/// \throw text::syntax_error If the vector does not existor if the index is out
/// of range.
const std::string&
text::templates_def::get_vector(const std::string& name,
const std::string& index_name) const
{
const strings_vector& vector = get_vector(name);
const std::string& index_str = get_variable(index_name);
std::size_t index;
try {
index = text::to_type< std::size_t >(index_str);
} catch (const text::syntax_error& e) {
throw text::syntax_error(F("Index '%s' not an integer, value '%s'") %
index_name % index_str);
}
if (index >= vector.size())
throw text::syntax_error(F("Index '%s' out of range at position '%s'") %
index_name % index);
return vector[index];
}
/// Evaluates a expression using these templates.
///
/// An expression is a query on the current templates to fetch a particular
/// value. The value is always returned as a string, as this is how templates
/// are internally stored.
///
/// \param expression The expression to evaluate. This should not include any
/// of the delimiters used in the user input, as otherwise the expression
/// will not be evaluated properly.
///
/// \return The result of the expression evaluation as a string.
///
/// \throw text::syntax_error If there is any problem while evaluating the
/// expression.
std::string
text::templates_def::evaluate(const std::string& expression) const
{
const std::string::size_type paren_open = expression.find('(');
if (paren_open == std::string::npos) {
return get_variable(expression);
} else {
const std::string::size_type paren_close = expression.find(
')', paren_open);
if (paren_close == std::string::npos)
throw text::syntax_error(F("Expected ')' in expression '%s')") %
expression);
if (paren_close != expression.length() - 1)
throw text::syntax_error(F("Unexpected text found after ')' in "
"expression '%s'") % expression);
const std::string arg0 = expression.substr(0, paren_open);
const std::string arg1 = expression.substr(
paren_open + 1, paren_close - paren_open - 1);
if (arg0 == "defined") {
return exists(arg1) ? "true" : "false";
} else if (arg0 == "length") {
return F("%s") % get_vector(arg1).size();
} else {
return get_vector(arg0, arg1);
}
}
}
/// Applies a set of templates to an input stream.
///
/// \param templates The templates to use.
/// \param input The input to process.
/// \param output The stream to which to write the processed text.
///
/// \throw text::syntax_error If there is any problem processing the input.
void
text::instantiate(const templates_def& templates,
std::istream& input, std::ostream& output)
{
templates_parser parser(templates, "%", "%%");
parser.instantiate(input, output);
}
/// Applies a set of templates to an input file and writes an output file.
///
/// \param templates The templates to use.
/// \param input_file The path to the input to process.
/// \param output_file The path to the file into which to write the output.
///
/// \throw text::error If the input or output files cannot be opened.
/// \throw text::syntax_error If there is any problem processing the input.
void
text::instantiate(const templates_def& templates,
const fs::path& input_file, const fs::path& output_file)
{
std::ifstream input(input_file.c_str());
if (!input)
throw text::error(F("Failed to open %s for read") % input_file);
std::ofstream output(output_file.c_str());
if (!output)
throw text::error(F("Failed to open %s for write") % output_file);
instantiate(templates, input, output);
}