From 1c0cf32b2246a0792f8e9b24dfd8f76ee5b364cd Mon Sep 17 00:00:00 2001 From: Chris Hennes Date: Thu, 30 Mar 2023 21:19:58 -0500 Subject: [PATCH] App/Toponaming: Add supporting code Support for reading and writing binary data, and a small tweak to bitmask. --- src/Base/Bitmask.h | 3 ++ src/Base/Reader.cpp | 81 +++++++++++++++++++++++++++++++++++++++++++++ src/Base/Reader.h | 26 +++++++++++++++ src/Base/Writer.cpp | 64 +++++++++++++++++++++++++++++++++++ src/Base/Writer.h | 21 ++++++++++++ 5 files changed, 195 insertions(+) diff --git a/src/Base/Bitmask.h b/src/Base/Bitmask.h index b1aaa10b89..13b2f40352 100644 --- a/src/Base/Bitmask.h +++ b/src/Base/Bitmask.h @@ -123,6 +123,9 @@ public: using u = typename std::underlying_type::type; return static_cast(i) == static_cast(f.i); } + typename std::underlying_type::type toUnderlyingType() const { + return static_cast::type>(i); + } }; } diff --git a/src/Base/Reader.cpp b/src/Base/Reader.cpp index 507186db52..3231d26ece 100644 --- a/src/Base/Reader.cpp +++ b/src/Base/Reader.cpp @@ -24,6 +24,7 @@ #include "PreCompiled.h" #ifndef _PreComp_ +#include # include #endif @@ -42,6 +43,7 @@ #include #endif #include +#include XERCES_CPP_NAMESPACE_USE @@ -283,6 +285,85 @@ void Base::XMLReader::readCharacters() { } +std::streamsize Base::XMLReader::read(char_type* s, std::streamsize n) +{ + + char_type* buf = s; + if (CharacterOffset < 0) { + return -1; + } + + for (;;) { + std::streamsize copy_size = + static_cast(Characters.size()) - CharacterOffset; + if (n < copy_size) { + copy_size = n; + } + std::memcpy(s, Characters.c_str() + CharacterOffset, copy_size); + n -= copy_size; + s += copy_size; + CharacterOffset += copy_size; + + if (!n) { + break; + } + + if (ReadType == Chars) { + read(); + } + else { + CharacterOffset = -1; + break; + } + } + + return s - buf; +} + +void Base::XMLReader::endCharStream() +{ + CharacterOffset = -1; + CharStream.reset(); +} + +std::istream& Base::XMLReader::charStream() +{ + if (!CharStream) { + throw Base::XMLParseException("no current character stream"); + } + return *CharStream; +} + +std::istream& Base::XMLReader::beginCharStream() +{ + if (CharStream) { + throw Base::XMLParseException("recursive character stream"); + } + + // TODO: An XML element can actually contain a mix of child elements and + // characters. So we should not actually demand 'StartElement' here. But + // with the current implementation of character stream, we cannot track + // child elements and character content at the same time. + if (ReadType == StartElement) { + CharacterOffset = 0; + read(); + } + else if (ReadType == StartEndElement) { + // If we are currently at a self-closing element, just leave the offset + // as negative and do not read any characters. This will result in an + // empty input stream for the caller. + CharacterOffset = -1; + } + else { + throw Base::XMLParseException("invalid state while reading character stream"); + } + + CharStream = std::make_unique(); + auto* filteringStream = dynamic_cast(CharStream.get()); + filteringStream->push(boost::ref(*this)); + return *CharStream; +} + void Base::XMLReader::readBinFile(const char* filename) { Base::FileInfo fi(filename); diff --git a/src/Base/Reader.h b/src/Base/Reader.h index d06617a95f..aa66f5390a 100644 --- a/src/Base/Reader.h +++ b/src/Base/Reader.h @@ -33,6 +33,8 @@ #include #include +#include + #include "FileInfo.h" @@ -127,6 +129,13 @@ public: XMLReader(const char* FileName, std::istream&); ~XMLReader() override; + /** @name boost iostream device interface */ + //@{ + using category = boost::iostreams::source_tag; + using char_type = char; + std::streamsize read(char_type* s, std::streamsize n); + //@} + bool isValid() const { return _valid; } bool isVerbose() const { return _verbose; } void setVerbose(bool on) { _verbose = on; } @@ -157,6 +166,20 @@ public: void readEndElement(const char* ElementName=nullptr, int level=-1); /// read until characters are found void readCharacters(); + + /** Obtain an input stream for reading characters + * + * @return Return a input stream for reading characters. The stream will be + * auto destroyed when you call with readElement() or readEndElement(), or + * you can end it explicitly with endCharStream(). + */ + std::istream &beginCharStream(); + /// Manually end the current character stream + void endCharStream(); + /// Obtain the current character stream + std::istream &charStream(); + //@} + /// read binary file void readBinFile(const char*); //@} @@ -259,6 +282,7 @@ protected: std::string LocalName; std::string Characters; unsigned int CharacterCount; + std::streamsize CharacterOffset{-1}; std::map AttrMap; using AttrMapType = std::map; @@ -285,6 +309,8 @@ protected: std::vector FileNames; std::bitset<32> StatusBits; + + std::unique_ptr CharStream; }; class BaseExport Reader : public std::istream diff --git a/src/Base/Writer.cpp b/src/Base/Writer.cpp index c93ec383bc..b1f9df92e2 100644 --- a/src/Base/Writer.cpp +++ b/src/Base/Writer.cpp @@ -34,11 +34,43 @@ #include "Stream.h" #include "Tools.h" +#include +#include using namespace Base; using namespace std; using namespace zipios; +// boost iostream filter to escape ']]>' in text file saved into CDATA section. +// It does not check if the character is valid utf8 or not. +struct cdata_filter { + + typedef char char_type; + typedef boost::iostreams::output_filter_tag category; + + template + inline bool put(Device& dev, char c) { + switch(state) { + case 0: + case 1: + if(c == ']') + ++state; + else + state = 0; + break; + case 2: + if(c == '>') { + static const char escape[] = "]]>(); + auto f = dynamic_cast(CharStream.get()); + f->push(cdata_filter()); + f->push(Stream()); + *f << std::setprecision(std::numeric_limits::digits10 + 1); + return *CharStream; +} + +std::ostream &Writer::endCharStream() { + if(CharStream) { + CharStream.reset(); + } + return Stream(); +} + +std::ostream &Writer::charStream() { + if(!CharStream) + throw Base::RuntimeError("Writer::endCharStream(): no current character stream"); + return *CharStream; +} + +void Writer::insertText(const std::string &s) { + beginCharStream() << s; + endCharStream(); +} + void Writer::insertAsciiFile(const char* FileName) { Base::FileInfo fi(FileName); diff --git a/src/Base/Writer.h b/src/Base/Writer.h index 24771cf123..3b71bb1d42 100644 --- a/src/Base/Writer.h +++ b/src/Base/Writer.h @@ -72,6 +72,8 @@ public: void insertAsciiFile(const char* FileName); /// insert a binary file BASE64 coded as CDATA section in the XML file void insertBinFile(const char* FileName); + /// insert text string as CDATA + void insertText(const std::string &s); /** @name additional file writing */ //@{ @@ -115,6 +117,23 @@ public: virtual std::ostream &Stream()=0; + /** Create an output stream for storing character content + * The input is assumed to be valid character with + * the current XML encoding, and will be enclosed inside + * CDATA section. The stream will scan the input and + * properly escape any CDATA ending inside. + * @return Returns an output stream. + * + * You must call endCharStream() to end the current character stream. + */ + std::ostream &beginCharStream(); + /** End the current character output stream + * @return Returns the normal writer stream for convenience + */ + std::ostream &endCharStream(); + /// Return the current character output stream + std::ostream &charStream(); + /// name for underlying file saves std::string ObjectName; @@ -138,6 +157,8 @@ protected: private: Writer(const Writer&); Writer& operator=(const Writer&); + + std::unique_ptr CharStream; };