From 1c0cf32b2246a0792f8e9b24dfd8f76ee5b364cd Mon Sep 17 00:00:00 2001
From: Chris Hennes <chennes@gmail.com>
Date: Thu, 30 Mar 2023 21:19:58 -0500
Subject: [PATCH] App/Toponaming: Add supporting code

Support for reading and writing binary data, and a small tweak to bitmask.
---
 src/Base/Bitmask.h  |  3 ++
 src/Base/Reader.cpp | 81 +++++++++++++++++++++++++++++++++++++++++++++
 src/Base/Reader.h   | 26 +++++++++++++++
 src/Base/Writer.cpp | 64 +++++++++++++++++++++++++++++++++++
 src/Base/Writer.h   | 21 ++++++++++++
 5 files changed, 195 insertions(+)
diff --git a/src/Base/Bitmask.h b/src/Base/Bitmask.h
index b1aaa10b89..13b2f40352 100644
--- a/src/Base/Bitmask.h
+++ b/src/Base/Bitmask.h
@@ -123,6 +123,9 @@ public:
         using u = typename std::underlying_type<Enum>::type;
         return static_cast<u>(i) == static_cast<u>(f.i);
     }
+    typename std::underlying_type<Enum>::type toUnderlyingType() const {
+        return static_cast<typename std::underlying_type<Enum>::type>(i);
+    }
 };
 }
 
diff --git a/src/Base/Reader.cpp b/src/Base/Reader.cpp
index 507186db52..3231d26ece 100644
--- a/src/Base/Reader.cpp
+++ b/src/Base/Reader.cpp
@@ -24,6 +24,7 @@
 #include "PreCompiled.h"
 
 #ifndef _PreComp_
+#include <memory>
 # include <xercesc/sax2/XMLReaderFactory.hpp>
 #endif
 
@@ -42,6 +43,7 @@
 #include <zipios++/zipios-config.h>
 #endif
 #include <zipios++/zipinputstream.h>
+#include <boost/iostreams/filtering_stream.hpp>
 
 
 XERCES_CPP_NAMESPACE_USE
@@ -283,6 +285,85 @@ void Base::XMLReader::readCharacters()
 {
 }
 
+std::streamsize Base::XMLReader::read(char_type* s, std::streamsize n)
+{
+
+    char_type* buf = s;
+    if (CharacterOffset < 0) {
+        return -1;
+    }
+
+    for (;;) {
+        std::streamsize copy_size =
+            static_cast<std::streamsize>(Characters.size()) - CharacterOffset;
+        if (n < copy_size) {
+            copy_size = n;
+        }
+        std::memcpy(s, Characters.c_str() + CharacterOffset, copy_size);
+        n -= copy_size;
+        s += copy_size;
+        CharacterOffset += copy_size;
+
+        if (!n) {
+            break;
+        }
+
+        if (ReadType == Chars) {
+            read();
+        }
+        else {
+            CharacterOffset = -1;
+            break;
+        }
+    }
+
+    return s - buf;
+}
+
+void Base::XMLReader::endCharStream()
+{
+    CharacterOffset = -1;
+    CharStream.reset();
+}
+
+std::istream& Base::XMLReader::charStream()
+{
+    if (!CharStream) {
+        throw Base::XMLParseException("no current character stream");
+    }
+    return *CharStream;
+}
+
+std::istream& Base::XMLReader::beginCharStream()
+{
+    if (CharStream) {
+        throw Base::XMLParseException("recursive character stream");
+    }
+
+    // TODO: An XML element can actually contain a mix of child elements and
+    // characters. So we should not actually demand 'StartElement' here. But
+    // with the current implementation of character stream, we cannot track
+    // child elements and character content at the same time.
+    if (ReadType == StartElement) {
+        CharacterOffset = 0;
+        read();
+    }
+    else if (ReadType == StartEndElement) {
+        // If we are currently at a self-closing element, just leave the offset
+        // as negative and do not read any characters. This will result in an
+        // empty input stream for the caller.
+        CharacterOffset = -1;
+    }
+    else {
+        throw Base::XMLParseException("invalid state while reading character stream");
+    }
+
+    CharStream = std::make_unique<boost::iostreams::filtering_istream>();
+    auto* filteringStream = dynamic_cast<boost::iostreams::filtering_istream*>(CharStream.get());
+    filteringStream->push(boost::ref(*this));
+    return *CharStream;
+}
+
 void Base::XMLReader::readBinFile(const char* filename)
 {
     Base::FileInfo fi(filename);
diff --git a/src/Base/Reader.h b/src/Base/Reader.h
index d06617a95f..aa66f5390a 100644
--- a/src/Base/Reader.h
+++ b/src/Base/Reader.h
@@ -33,6 +33,8 @@
 #include <xercesc/sax2/Attributes.hpp>
 #include <xercesc/sax2/DefaultHandler.hpp>
 
+#include <boost/iostreams/concepts.hpp>
+
 #include "FileInfo.h"
 
 
@@ -127,6 +129,13 @@ public:
     XMLReader(const char* FileName, std::istream&);
     ~XMLReader() override;
 
+    /** @name boost iostream device interface */
+    //@{
+    using category = boost::iostreams::source_tag;
+    using char_type = char;
+    std::streamsize read(char_type* s, std::streamsize n);
+    //@}
+
     bool isValid() const { return _valid; }
     bool isVerbose() const { return _verbose; }
     void setVerbose(bool on) { _verbose = on; }
@@ -157,6 +166,20 @@ public:
     void readEndElement(const char* ElementName=nullptr, int level=-1);
     /// read until characters are found
     void readCharacters();
+
+    /** Obtain an input stream for reading characters
+     *
+     *  @return Return a input stream for reading characters. The stream will be
+     *  auto destroyed when you call with readElement() or readEndElement(), or
+     *  you can end it explicitly with endCharStream().
+     */
+    std::istream &beginCharStream();
+    /// Manually end the current character stream
+    void endCharStream();
+    /// Obtain the current character stream
+    std::istream &charStream();
+    //@}
+
     /// read binary file
     void readBinFile(const char*);
     //@}
@@ -259,6 +282,7 @@ protected:
     std::string LocalName;
     std::string Characters;
     unsigned int CharacterCount;
+    std::streamsize CharacterOffset{-1};
 
     std::map<std::string,std::string> AttrMap;
     using AttrMapType = std::map<std::string,std::string>;
@@ -285,6 +309,8 @@ protected:
     std::vector<std::string> FileNames;
 
     std::bitset<32> StatusBits;
+
+    std::unique_ptr<std::istream> CharStream;
 };
 
 class BaseExport Reader : public std::istream
diff --git a/src/Base/Writer.cpp b/src/Base/Writer.cpp
index c93ec383bc..b1f9df92e2 100644
--- a/src/Base/Writer.cpp
+++ b/src/Base/Writer.cpp
@@ -34,11 +34,43 @@
 #include "Stream.h"
 #include "Tools.h"
 
+#include <boost/iostreams/filtering_stream.hpp>
+#include <memory>
 
 using namespace Base;
 using namespace std;
 using namespace zipios;
 
+// boost iostream filter to escape ']]>' in text file saved into CDATA section.
+// It does not check if the character is valid utf8 or not.
+struct cdata_filter {
+
+    typedef char char_type;
+    typedef boost::iostreams::output_filter_tag category;
+
+    template<typename Device>
+    inline bool put(Device& dev, char c) {
+        switch(state) {
+            case 0:
+            case 1:
+                if(c == ']')
+                    ++state;
+                else
+                    state = 0;
+                break;
+            case 2:
+                if(c == '>') {
+                    static const char escape[] = "]]><![CDATA[";
+                    boost::iostreams::write(dev,escape,sizeof(escape)-1);
+                }
+                state = 0;
+                break;
+        }
+        return boost::iostreams::put(dev,c);
+    }
+
+    int state = 0;
+};
 
 // ---------------------------------------------------------------------------
 //  Writer: Constructors and Destructor
@@ -55,6 +87,38 @@ Writer::Writer()
 
 Writer::~Writer() = default;
 
+std::ostream &Writer::beginCharStream() {
+    if(CharStream) {
+        throw Base::RuntimeError("Writer::beginCharStream(): invalid state");
+    }
+
+    Stream() << "<![CDATA[";
+    CharStream = std::make_unique<boost::iostreams::filtering_ostream>();
+    auto f = dynamic_cast<boost::iostreams::filtering_ostream*>(CharStream.get());
+    f->push(cdata_filter());
+    f->push(Stream());
+    *f << std::setprecision(std::numeric_limits<double>::digits10 + 1);
+    return *CharStream;
+}
+
+std::ostream &Writer::endCharStream() {
+    if(CharStream) {
+        CharStream.reset();
+    }
+    return Stream();
+}
+
+std::ostream &Writer::charStream() {
+    if(!CharStream)
+        throw Base::RuntimeError("Writer::endCharStream(): no current character stream");
+    return *CharStream;
+}
+
+void Writer::insertText(const std::string &s) {
+    beginCharStream() << s;
+    endCharStream();
+}
+
 void Writer::insertAsciiFile(const char* FileName)
 {
     Base::FileInfo fi(FileName);
diff --git a/src/Base/Writer.h b/src/Base/Writer.h
index 24771cf123..3b71bb1d42 100644
--- a/src/Base/Writer.h
+++ b/src/Base/Writer.h
@@ -72,6 +72,8 @@ public:
     void insertAsciiFile(const char* FileName);
     /// insert a binary file BASE64 coded as CDATA section in the XML file
     void insertBinFile(const char* FileName);
+    /// insert text string as CDATA
+    void insertText(const std::string &s);
 
     /** @name additional file writing */
     //@{
@@ -115,6 +117,23 @@ public:
 
     virtual std::ostream &Stream()=0;
 
+    /** Create an output stream for storing character content
+     * The input is assumed to be valid character with
+     * the current XML encoding, and will be enclosed inside
+     * CDATA section.  The stream will scan the input and
+     * properly escape any CDATA ending inside.
+     * @return Returns an output stream.
+     *
+     * You must call endCharStream() to end the current character stream.
+     */
+    std::ostream &beginCharStream();
+    /** End the current character output stream
+     * @return Returns the normal writer stream for convenience
+     */
+    std::ostream &endCharStream();
+    /// Return the current character output stream
+    std::ostream &charStream();
+
     /// name for underlying file saves
     std::string ObjectName;
 
@@ -138,6 +157,8 @@ protected:
 private:
     Writer(const Writer&);
     Writer& operator=(const Writer&);
+
+    std::unique_ptr<std::ostream> CharStream;
 };