base: Add wrapped protobuf input stream

This patch adds support for inputting protobuf messages through a
ProtoInputStream which hides the internal streams used by the
library. The stream is created based on the name of an input file and
optionally includes decompression using gzip.

The input stream will start by getting a magic number from the file,
and also verify that it matches with the expected value. Once opened,
messages can be read incrementally from the stream, returning
true/false until an error occurs or the end of the file is reached.
This commit is contained in:
Andreas Hansson 2013-01-07 13:05:37 -05:00
parent f456c7983d
commit 4afa6c4c3e
2 changed files with 202 additions and 6 deletions

View file

@ -62,10 +62,8 @@ ProtoOutputStream::ProtoOutputStream(const string& filename) :
codedStream = new io::CodedOutputStream(zeroCopyStream); codedStream = new io::CodedOutputStream(zeroCopyStream);
} }
// Use the ASCII characters gem5 as our magic number and write it // Write the magic number to the file
// to the file codedStream->WriteLittleEndian32(magicNumber);
const uint32_t magic_number = 0x356d6567;
codedStream->WriteLittleEndian32(magic_number);
// Note that each type of stream (packet, instruction etc) should // Note that each type of stream (packet, instruction etc) should
// add its own header and perform the appropriate checks // add its own header and perform the appropriate checks
@ -91,3 +89,102 @@ ProtoOutputStream::write(const Message& msg)
if (!msg.SerializeToCodedStream(codedStream)) if (!msg.SerializeToCodedStream(codedStream))
panic("Unable to write message to coded stream\n"); panic("Unable to write message to coded stream\n");
} }
ProtoInputStream::ProtoInputStream(const string& filename) :
fileStream(filename, ios::in | ios::binary), fileName(filename),
useGzip(false),
zeroCopyStream(NULL), gzipStream(NULL), codedStream(NULL)
{
if (!fileStream.good())
panic("Could not open %s for reading\n", filename);
// check the magic number to see if this is a gzip stream
unsigned char bytes[2];
fileStream.read((char*) bytes, 2);
useGzip = fileStream.good() && bytes[0] == 0x1f && bytes[1] == 0x8b;
// seek to the start of the input file and clear any flags
fileStream.clear();
fileStream.seekg(0, ifstream::beg);
createStreams();
}
void
ProtoInputStream::createStreams()
{
// All streams should be NULL at this point
assert(zeroCopyStream == NULL && gzipStream == NULL &&
codedStream == NULL);
// Wrap the input file in a zero copy stream, that in turn is
// wrapped in a gzip stream if the filename ends with .gz. The
// latter stream is in turn wrapped in a coded stream
zeroCopyStream = new io::IstreamInputStream(&fileStream);
if (useGzip) {
gzipStream = new io::GzipInputStream(zeroCopyStream);
codedStream = new io::CodedInputStream(gzipStream);
} else {
codedStream = new io::CodedInputStream(zeroCopyStream);
}
uint32_t magic_check;
if (!codedStream->ReadLittleEndian32(&magic_check) ||
magic_check != magicNumber)
panic("Input file %s is not a valid gem5 proto format.\n",
fileName);
}
void
ProtoInputStream::destroyStreams()
{
delete codedStream;
codedStream = NULL;
// As the compression is optional, see if the stream exists
if (gzipStream != NULL) {
delete gzipStream;
gzipStream = NULL;
}
delete zeroCopyStream;
zeroCopyStream = NULL;
}
ProtoInputStream::~ProtoInputStream()
{
destroyStreams();
fileStream.close();
}
void
ProtoInputStream::reset()
{
destroyStreams();
// seek to the start of the input file and clear any flags
fileStream.clear();
fileStream.seekg(0, ifstream::beg);
createStreams();
}
bool
ProtoInputStream::read(Message& msg)
{
// Read a message from the stream by getting the size, using it as
// a limit when parsing the message, then popping the limit again
uint32_t size;
if (codedStream->ReadVarint32(&size)) {
io::CodedInputStream::Limit limit = codedStream->PushLimit(size);
if (msg.ParseFromCodedStream(codedStream)) {
codedStream->PopLimit(limit);
// All went well, the message is parsed and the limit is
// popped again
return true;
} else {
panic("Unable to read message from coded stream %s\n",
fileName);
}
}
return false;
}

View file

@ -40,7 +40,7 @@
/** /**
* @file * @file
* Declaration of a wrapper for protobuf output streams. * Declaration of a wrapper for protobuf output streams and input streams.
*/ */
#ifndef __PROTO_PROTOIO_HH__ #ifndef __PROTO_PROTOIO_HH__
@ -53,6 +53,34 @@
#include <fstream> #include <fstream>
/**
* A ProtoStream provides the shared functionality of the input and
* output streams. At the moment this is limited to magic number.
*/
class ProtoStream
{
protected:
/// Use the ASCII characters gem5 as our magic number
static const uint32_t magicNumber = 0x356d6567;
/**
* Create a ProtoStream.
*/
ProtoStream() {}
private:
/**
* Hide the copy constructor and assignment operator.
* @{
*/
ProtoStream(const ProtoStream&);
ProtoStream& operator=(const ProtoStream&);
/** @} */
};
/** /**
* A ProtoOutputStream wraps a coded stream, potentially with * A ProtoOutputStream wraps a coded stream, potentially with
* compression, based on looking at the file name. Writing to the * compression, based on looking at the file name. Writing to the
@ -61,7 +89,7 @@
* is made possible by encoding the length of each message in the * is made possible by encoding the length of each message in the
* stream. * stream.
*/ */
class ProtoOutputStream class ProtoOutputStream : public ProtoStream
{ {
public: public:
@ -104,4 +132,75 @@ class ProtoOutputStream
}; };
/**
* A ProtoInputStream wraps a coded stream, potentially with
* decompression, based on looking at the file name. Reading from the
* stream is done on a per-message basis to avoid having to deal with
* huge data structures. The latter assumes the length of each message
* is encoded in the stream when it is written.
*/
class ProtoInputStream : public ProtoStream
{
public:
/**
* Create an input stream for a given file name. If the filename
* ends with .gz then the file will be decompressed accordingly.
*
* @param filename Path to the file to read from
*/
ProtoInputStream(const std::string& filename);
/**
* Destruct the input stream, and also close the underlying file
* streams and coded streams.
*/
~ProtoInputStream();
/**
* Read a message from the stream.
*
* @param msg Message read from the stream
* @param return True if a message was read, false if reading fails
*/
bool read(google::protobuf::Message& msg);
/**
* Reset the input stream and seek to the beginning of the file.
*/
void reset();
private:
/**
* Create the internal streams that are wrapping the input file.
*/
void createStreams();
/**
* Destroy the internal streams that are wrapping the input file.
*/
void destroyStreams();
/// Underlying file input stream
std::ifstream fileStream;
/// Hold on to the file name for debug messages
const std::string fileName;
/// Boolean flag to remember whether we use gzip or not
bool useGzip;
/// Zero Copy stream wrapping the STL input stream
google::protobuf::io::IstreamInputStream* zeroCopyStream;
/// Optional Gzip stream to wrap the Zero Copy stream
google::protobuf::io::GzipInputStream* gzipStream;
/// Top-level coded stream that messages are read from
google::protobuf::io::CodedInputStream* codedStream;
};
#endif //__PROTO_PROTOIO_HH #endif //__PROTO_PROTOIO_HH