Merge pull request #9148 from chennes/toponamingStringHasher

App/Toponaming: String hasher
2023-05-15 20:14:43 -05:00
parent 6b1dcf655e 41a924ccec
commit fd78b3d5d0
19 changed files with 4227 additions and 4 deletions
--- a/src/App/CMakeLists.txt
+++ b/src/App/CMakeLists.txt
@@ -90,6 +90,8 @@ generate_from_xml(GeoFeatureGroupExtensionPy)
 generate_from_xml(MetadataPy)
 generate_from_xml(OriginGroupExtensionPy)
 generate_from_xml(PartPy)
+generate_from_xml(StringHasherPy)
+generate_from_xml(StringIDPy)

 generate_from_xml(ComplexGeoDataPy)
 generate_from_xml(PropertyContainerPy)
@@ -115,6 +117,8 @@ SET(FreeCADApp_XML_SRCS
    PropertyContainerPy.xml
    ComplexGeoDataPy.xml
    MaterialPy.xml
+    StringHasherPy.xml
+    StringIDPy.xml
 )
 SOURCE_GROUP("XML" FILES ${FreeCADApp_XML_SRCS})

@@ -270,6 +274,9 @@ SET(FreeCADApp_CPP_SRCS
    MaterialPyImp.cpp
    Metadata.cpp
    MetadataPyImp.cpp
+    StringHasher.cpp
+    StringHasherPyImp.cpp
+    StringIDPyImp.cpp
 )

 SET(FreeCADApp_HPP_SRCS
@@ -288,6 +295,7 @@ SET(FreeCADApp_HPP_SRCS
    MappedElement.h
    Material.h
    Metadata.h
+    StringHasher.h
 )

 SET(FreeCADApp_SRCS
--- a/src/App/StringHasher.cpp
+++ b/src/App/StringHasher.cpp
@@ -0,0 +1,869 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+/***************************************************************************************************
+ *                                                                                                 *
+ *   Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com>                       *
+ *   Copyright (c) 2023 FreeCAD Project Association                                                *
+ *                                                                                                 *
+ *   This file is part of FreeCAD.                                                                 *
+ *                                                                                                 *
+ *   FreeCAD is free software: you can redistribute it and/or modify it under the terms of the     *
+ *   GNU Lesser General Public License as published by the Free Software Foundation, either        *
+ *   version 2.1 of the License, or (at your option) any later version.                            *
+ *                                                                                                 *
+ *   FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;          *
+ *   without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     *
+ *   See the GNU Lesser General Public License for more details.                                   *
+ *                                                                                                 *
+ *   You should have received a copy of the GNU Lesser General Public License along with           *
+ *   FreeCAD. If not, see <https://www.gnu.org/licenses/>.                                         *
+ *                                                                                                 *
+ **************************************************************************************************/
+
+#include "PreCompiled.h"
+
+#include <QCryptographicHash>
+#include <QHash>
+#include <deque>
+
+#include <Base/Console.h>
+#include <Base/Reader.h>
+#include <Base/Stream.h>
+#include <Base/Writer.h>
+
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/split.hpp>
+#include <boost/bimap.hpp>
+#include <boost/bimap/set_of.hpp>
+#include <boost/bimap/unordered_set_of.hpp>
+#include <boost/iostreams/stream.hpp>
+
+#include "MappedElement.h"
+#include "StringHasher.h"
+#include "StringHasherPy.h"
+#include "StringIDPy.h"
+
+
+FC_LOG_LEVEL_INIT("App", true, true)
+
+namespace bio = boost::iostreams;
+using namespace App;
+
+///////////////////////////////////////////////////////////
+
+struct StringIDHasher
+{
+    std::size_t operator()(const StringID* sid) const
+    {
+        if (!sid) {
+            return 0;
+        }
+        return qHash(sid->data(), qHash(sid->postfix()));
+    }
+
+    bool operator()(const StringID* IDa, const StringID* IDb) const
+    {
+        if (IDa == IDb) {
+            return true;
+        }
+        if (!IDa || !IDb) {
+            return false;
+        }
+        return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix();
+    }
+};
+
+using HashMapBase =
+    boost::bimap<boost::bimaps::unordered_set_of<StringID*, StringIDHasher, StringIDHasher>,
+                 boost::bimaps::set_of<long>>;
+
+class StringHasher::HashMap: public HashMapBase
+{
+public:
+    bool SaveAll = false;
+    int Threshold = 0;
+};
+
+///////////////////////////////////////////////////////////
+
+TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass)
+
+StringID::~StringID()
+{
+    if (_hasher) {
+        _hasher->_hashes->right.erase(_id);
+    }
+}
+
+PyObject* StringID::getPyObject()
+{
+    return new StringIDPy(this);
+}
+
+PyObject* StringID::getPyObjectWithIndex(int index)
+{
+    auto res = new StringIDPy(this);
+    res->_index = index;
+    return res;
+}
+
+std::string StringID::toString(int index) const
+{
+    std::ostringstream ss;
+    ss << '#' << std::hex << value();
+    if (index != 0) {
+        ss << ':' << index;
+    }
+    return ss.str();
+}
+
+StringID::IndexID StringID::fromString(const char* name, bool eof, int size)
+{
+    IndexID res {};
+    res.id = 0;
+    res.index = 0;
+    if (!name) {
+        res.id = -1;
+        return res;
+    }
+    if (size < 0) {
+        size = static_cast<int>(std::strlen(name));
+    }
+    bio::stream<bio::array_source> iss(name, size);
+    char sep = 0;
+    char sep2 = 0;
+    iss >> sep >> std::hex >> res.id >> sep2 >> res.index;
+    if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) {
+        res.id = -1;
+        return res;
+    }
+    return res;
+}
+
+std::string StringID::dataToText(int index) const
+{
+    if (isHashed() || isBinary()) {
+        return _data.toBase64().constData();
+    }
+
+    std::string res(_data.constData());
+    if (index != 0) {
+        res += std::to_string(index);
+    }
+    if (_postfix.size() != 0) {
+        res += _postfix.constData();
+    }
+    return res;
+}
+
+void StringID::mark() const
+{
+    if (isMarked()) {
+        return;
+    }
+    _flags.setFlag(Flag::Marked);
+    for (auto& sid : _sids) {
+        sid.deref().mark();
+    }
+}
+
+///////////////////////////////////////////////////////////
+
+TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence)
+
+StringHasher::StringHasher()
+    : _hashes(new HashMap)
+{}
+
+StringHasher::~StringHasher()
+{
+    clear();
+}
+
+void StringHasher::setSaveAll(bool enable)
+{
+    if (_hashes->SaveAll == enable) {
+        return;
+    }
+    _hashes->SaveAll = enable;
+    compact();
+}
+
+void StringHasher::compact()
+{
+    if (_hashes->SaveAll) {
+        return;
+    }
+
+    // Make a list of all the table entries that have only a single reference and are not marked
+    // "persistent"
+    std::deque<StringIDRef> pendings;
+    for (auto& hasher : _hashes->right) {
+        if (!hasher.second->isPersistent() && hasher.second->getRefCount() == 1) {
+            pendings.emplace_back(hasher.second);
+        }
+    }
+
+    // Recursively remove the unused StringIDs
+    while (!pendings.empty()) {
+        StringIDRef sid = pendings.front();
+        pendings.pop_front();
+        // Try to erase the map entry for this StringID
+        if (_hashes->right.erase(sid.value()) == 0U) {
+            continue;// If nothing was erased, there's nothing more to do
+        }
+        sid._sid->_hasher = nullptr;
+        sid._sid->unref();
+        for (auto& hasher : sid._sid->_sids) {
+            if (hasher._sid->_hasher == this && !hasher._sid->isPersistent()
+                && hasher._sid->getRefCount() == 2) {
+                // If the related StringID also uses this hasher, is not marked persistent, and has
+                // a current reference count of 2 (which will be its hasher reference and its entry
+                // in the related SIDs list), then prep it for removal as well.
+                pendings.push_back(hasher);
+            }
+        }
+    }
+}
+
+bool StringHasher::getSaveAll() const
+{
+    return _hashes->SaveAll;
+}
+
+void StringHasher::setThreshold(int threshold)
+{
+    _hashes->Threshold = threshold;
+}
+
+int StringHasher::getThreshold() const
+{
+    return _hashes->Threshold;
+}
+
+long StringHasher::lastID() const
+{
+    if (_hashes->right.empty()) {
+        return 0;
+    }
+    auto it = _hashes->right.end();
+    --it;
+    return it->first;
+}
+
+StringIDRef StringHasher::getID(const char* text, int len, bool hashable)
+{
+    if (len < 0) {
+        len = static_cast<int>(strlen(text));
+    }
+    return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None);
+}
+
+StringIDRef StringHasher::getID(const QByteArray& data, Options options)
+{
+    bool binary = options.testFlag(Option::Binary);
+    bool hashable = options.testFlag(Option::Hashable);
+    bool nocopy = options.testFlag(Option::NoCopy);
+
+    bool hashed = hashable && _hashes->Threshold > 0 && (int)data.size() > _hashes->Threshold;
+
+    StringID dataID;
+    if (hashed) {
+        QCryptographicHash hasher(QCryptographicHash::Sha1);
+        hasher.addData(data);
+        dataID._data = hasher.result();
+    }
+    else {
+        dataID._data = data;
+    }
+
+    auto it = _hashes->left.find(&dataID);
+    if (it != _hashes->left.end()) {
+        return {it->first};
+    }
+
+    if (!hashed && !nocopy) {
+        // if not hashed, make a deep copy of the data
+        dataID._data = QByteArray(data.constData(), data.size());
+    }
+
+    StringID::Flags flags(StringID::Flag::None);
+    if (binary) {
+        flags.setFlag(StringID::Flag::Binary);
+    }
+    if (hashed) {
+        flags.setFlag(StringID::Flag::Hashed);
+    }
+    StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags));
+    return {insert(sid)};
+}
+
+StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<StringIDRef>& sids)
+{
+    StringID tempID;
+    tempID._postfix = name.postfixBytes();
+
+    Data::IndexedName indexed;
+    if (tempID._postfix.size() != 0) {
+        // Only check for IndexedName if there is postfix, because of the way
+        // we restore the StringID. See StringHasher::saveStream/restoreStreamNew()
+        indexed = Data::IndexedName(name.dataBytes());
+    }
+    if (indexed) {
+        // If this is an IndexedName, then _data only stores the base part of the name, without the
+        // integer index
+        tempID._data =
+            QByteArray::fromRawData(indexed.getType(), static_cast<int>(strlen(indexed.getType())));
+    }
+    else {
+        // Store the entire name in _data, but temporarily re-use the existing memory
+        tempID._data = name.dataBytes();
+    }
+
+    // Check to see if there is already an entry in the hash table for this StringID
+    auto it = _hashes->left.find(&tempID);
+    if (it != _hashes->left.end()) {
+        auto res = StringIDRef(it->first);
+        if (indexed) {
+            res._index = indexed.getIndex();
+        }
+        return res;
+    }
+
+    if (!indexed && name.isRaw()) {
+        // Make a copy of the memory if we didn't do so earlier
+        tempID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
+    }
+
+    // If the postfix is not already encoded, use getID to encode it:
+    StringIDRef postfixRef;
+    if ((tempID._postfix.size() != 0) && tempID._postfix.indexOf("#") < 0) {
+        postfixRef = getID(tempID._postfix);
+        postfixRef.toBytes(tempID._postfix);
+    }
+
+    // If _data is an IndexedName, use getID to encode it:
+    StringIDRef indexRef;
+    if (indexed) {
+        indexRef = getID(tempID._data);
+    }
+
+    // The real StringID object that we are going to insert
+    StringIDRef newStringIDRef(new StringID(lastID() + 1, tempID._data));
+    StringID& newStringID = *newStringIDRef._sid;
+    if (tempID._postfix.size() != 0) {
+        newStringID._flags.setFlag(StringID::Flag::Postfixed);
+        newStringID._postfix = tempID._postfix;
+    }
+
+    // Count the related SIDs that use this hasher
+    int numSIDs = 0;
+    for (const auto& relatedID : sids) {
+        if (relatedID && relatedID._sid->_hasher == this) {
+            ++numSIDs;
+        }
+    }
+
+    int numAddedSIDs = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
+    if (numSIDs == sids.size() && !postfixRef && !indexRef) {
+        // The simplest case: just copy the whole list
+        newStringID._sids = sids;
+    }
+    else {
+        // Put the added SIDs at the front of the SID list
+        newStringID._sids.reserve(numSIDs + numAddedSIDs);
+        if (postfixRef) {
+            newStringID._flags.setFlag(StringID::Flag::PostfixEncoded);
+            newStringID._sids.push_back(postfixRef);
+        }
+        if (indexRef) {
+            newStringID._flags.setFlag(StringID::Flag::Indexed);
+            newStringID._sids.push_back(indexRef);
+        }
+        // Append the sids from the input list whose hasher is this one
+        for (const auto& relatedID : sids) {
+            if (relatedID && relatedID._sid->_hasher == this) {
+                newStringID._sids.push_back(relatedID);
+            }
+        }
+    }
+
+    // If the number of related IDs is larger than some threshold (hardcoded to 10 right now), then
+    // remove any duplicates (ignoring the new SIDs we may have just added)
+    const int relatedIDSizeThreshold {10};
+    if (newStringID._sids.size() > relatedIDSizeThreshold) {
+        std::sort(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end());
+        newStringID._sids.erase(
+            std::unique(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end()),
+            newStringID._sids.end());
+    }
+
+    // If the new StringID has a postfix, but is not indexed, see if the data string itself
+    // contains an index.
+    if ((newStringID._postfix.size() != 0) && !indexed) {
+        // Use the fromString function to parse the new StringID's data field for a possible index
+        StringID::IndexID res = StringID::fromString(newStringID._data);
+        if (res.id > 0) {// If the data had an index
+            if (res.index != 0) {
+                indexed.setIndex(res.index);
+                newStringID._data.resize(newStringID._data.lastIndexOf(':')+1);
+            }
+            int offset = newStringID.isPostfixEncoded() ? 1 : 0;
+            // Search for the SID with that index
+            for (int i = offset; i < newStringID._sids.size(); ++i) {
+                if (newStringID._sids[i].value() == res.id) {
+                    if (i != offset) {
+                        // If this SID is not already the first element in sids, move it there by
+                        // swapping it with whatever WAS there
+                        std::swap(newStringID._sids[offset], newStringID._sids[i]);
+                    }
+                    if (res.index != 0) {
+                        newStringID._flags.setFlag(StringID::Flag::PrefixIDIndex);
+                    }
+                    else {
+                        newStringID._flags.setFlag(StringID::Flag::PrefixID);
+                    }
+                    break;
+                }
+            }
+        }
+    }
+
+    return {insert(newStringIDRef), indexed.getIndex()};
+}
+
+StringIDRef StringHasher::getID(long id, int index) const
+{
+    if (id <= 0) {
+        return {};
+    }
+    auto it = _hashes->right.find(id);
+    if (it == _hashes->right.end()) {
+        return {};
+    }
+    StringIDRef res(it->second);
+    res._index = index;
+    return res;
+}
+
+void StringHasher::setPersistenceFileName(const char* filename) const
+{
+    if (!filename) {
+        filename = "";
+    }
+    _filename = filename;
+}
+
+const std::string& StringHasher::getPersistenceFileName() const
+{
+    return _filename;
+}
+
+void StringHasher::Save(Base::Writer& writer) const
+{
+
+    size_t count = 0;
+    if (_hashes->SaveAll) {
+        count = _hashes->size();
+    }
+    else {
+        count = 0;
+        for (auto& hasher : _hashes->right) {
+            if (hasher.second->isMarked() || hasher.second->isPersistent()) {
+                ++count;
+            }
+        }
+    }
+
+    writer.Stream() << writer.ind() << "<StringHasher saveall=\"" << _hashes->SaveAll
+                    << "\" threshold=\"" << _hashes->Threshold << "\"";
+
+    if (count == 0U) {
+        writer.Stream() << " count=\"0\"></StringHasher>\n";
+        return;
+    }
+
+    writer.Stream() << " count=\"0\" new=\"1\"/>\n";
+
+    writer.Stream() << writer.ind() << "<StringHasher2 ";
+    if (!_filename.empty()) {
+        writer.Stream() << " file=\"" << writer.addFile((_filename + ".txt").c_str(), this)
+                        << "\"/>\n";
+        return;
+    }
+
+    writer.Stream() << " count=\"" << count << "\">\n";
+    saveStream(writer.beginCharStream() << '\n');
+    writer.endCharStream() << '\n';
+    writer.Stream() << writer.ind() << "</StringHasher2>\n";
+}
+
+void StringHasher::SaveDocFile(Base::Writer& writer) const
+{
+    std::size_t count = _hashes->SaveAll ? this->size() : this->count();
+    writer.Stream() << "StringTableStart v1 " << count << '\n';
+    saveStream(writer.Stream());
+}
+
+void StringHasher::saveStream(std::ostream& stream) const
+{
+    boost::io::ios_flags_saver ifs(stream);
+    stream << std::hex;
+
+    long anchor = 0;
+    const StringID* last = nullptr;
+    long lastID = 0;
+    bool relative = false;
+
+    for (auto& hasher : _hashes->right) {
+        auto& d = *hasher.second;
+        long id = d._id;
+        if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) {
+            continue;
+        }
+
+        // We use relative coding to save space. But in order to have some
+        // minimum protection against corruption, write an absolute value every
+        // once a while.
+        relative = (id - anchor) < 1000;
+        if (relative) {
+            stream << '-' << id - lastID;
+        }
+        else {
+            anchor = id;
+            stream << id;
+        }
+        lastID = id;
+
+        int offset = d.isPostfixEncoded() ? 1 : 0;
+
+        StringID::IndexID prefixID {};
+        prefixID.id = 0;
+        prefixID.index = 0;
+        if (d.isPrefixID()) {
+            assert(d._sids.size() > offset);
+            prefixID.id = d._sids[offset].value();
+        }
+        else if (d.isPrefixIDIndex()) {
+            prefixID = StringID::fromString(d._data);
+            assert(d._sids.size() > offset && d._sids[offset].value() == prefixID.id);
+        }
+
+        auto flags = d._flags;
+        flags.setFlag(StringID::Flag::Marked, false);
+        stream << '.' << flags.toUnderlyingType();
+
+        int position = 0;
+        if (!relative) {
+            for (; position < d._sids.size(); ++position) {
+                stream << '.' << d._sids[position].value();
+            }
+        }
+        else {
+            if (last) {
+                for (; position < d._sids.size() && position < last->_sids.size(); ++position) {
+                    long m = last->_sids[position].value();
+                    long n = d._sids[position].value();
+                    if (n < m) {
+                        stream << ".-" << m - n;
+                    }
+                    else {
+                        stream << '.' << n - m;
+                    }
+                }
+            }
+            for (; position < d._sids.size(); ++position) {
+                stream << '.' << id - d._sids[position].value();
+            }
+        }
+
+        last = &d;
+
+        // Having postfix means it is a geometry element name, which
+        // guarantees to be a single line without space. So it is safe to
+        // store in raw stream.
+        if (d.isPostfixed()) {
+            if (!d.isPrefixIDIndex() && !d.isIndexed() && !d.isPrefixID()) {
+                stream << ' ' << d._data.constData();
+            }
+
+            if (!d.isPostfixEncoded()) {
+                stream << ' ' << d._postfix.constData();
+            }
+            stream << '\n';
+        }
+        else {
+            // Reaching here means the string may contain space and newlines
+            stream << ' ';
+            stream << std::dec << d._data.constData() << std::hex;
+        }
+    }
+}
+
+void StringHasher::RestoreDocFile(Base::Reader& reader)
+{
+    std::string marker;
+    std::string ver;
+    reader >> marker;
+    std::size_t count = 0;
+    _hashes->clear();
+    if (marker == "StringTableStart") {
+        reader >> ver >> count;
+        if (ver != "v1") {
+            FC_WARN("Unknown string table format");
+        }
+        restoreStreamNew(reader, count);
+        return;
+    }
+    count = atoi(marker.c_str());
+    restoreStream(reader, count);
+}
+
+void StringHasher::restoreStreamNew(std::istream& stream, std::size_t count)
+{
+    _hashes->clear();
+    std::string content;
+    boost::io::ios_flags_saver ifs(stream);
+    stream >> std::hex;
+    std::vector<std::string> tokens;
+    long lastid = 0;
+    const StringID* last = nullptr;
+
+    std::string tmp;
+
+    for (uint32_t i = 0; i < count; ++i) {
+        if (!(stream >> tmp)) {
+            FC_THROWM(Base::RuntimeError, "Invalid string table");
+        }
+
+        tokens.clear();
+        boost::split(tokens, tmp, boost::is_any_of("."));
+        if (tokens.size() < 2) {
+            FC_THROWM(Base::RuntimeError, "Invalid string table");
+        }
+
+        long id = 0;
+        bool relative = false;
+        if (tokens[0][0] == '-') {
+            relative = true;
+            id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16);
+        }
+        else {
+            id = strtol(tokens[0].c_str(), nullptr, 16);
+        }
+
+        lastid = id;
+
+        unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16);
+        StringIDRef sid(new StringID(id, QByteArray(), static_cast<StringID::Flag>(flag)));
+
+        StringID& d = *sid._sid;
+        d._sids.reserve(tokens.size() - 2);
+
+        int j = 2;
+        if (relative && last) {
+            for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) {
+                long m = last->_sids[j - 2].value();
+                long n;
+                if (tokens[j][0] == '-') {
+                    n = -strtol(&tokens[j][1], nullptr, 16);
+                }
+                else {
+                    n = strtol(&tokens[j][0], nullptr, 16);
+                }
+                StringIDRef sid = getID(m + n);
+                if (!sid) {
+                    FC_THROWM(Base::RuntimeError, "Invalid string id reference");
+                }
+                d._sids.push_back(sid);
+            }
+        }
+        for (; j < (int)tokens.size(); ++j) {
+            long n = strtol(tokens[j].data(), nullptr, 16);
+            StringIDRef sid = getID(relative ? id - n : n);
+            if (!sid) {
+                FC_THROWM(Base::RuntimeError, "Invalid string id reference");
+            }
+            d._sids.push_back(sid);
+        }
+
+        if (!d.isPostfixed()) {
+            stream >> content;
+            if (d.isHashed() || d.isBinary()) {
+                d._data = QByteArray::fromBase64(content.c_str());
+            }
+            else {
+                d._data = content.c_str();
+            }
+        }
+        else {
+            int offset = 0;
+            if (d.isPostfixEncoded()) {
+                offset = 1;
+                if (d._sids.empty()) {
+                    FC_THROWM(Base::RuntimeError, "Missing string postfix");
+                }
+                d._postfix = d._sids[0]._sid->_data;
+            }
+            if (d.isIndexed()) {
+                if (d._sids.size() <= offset) {
+                    FC_THROWM(Base::RuntimeError, "Missing string prefix");
+                }
+                d._data = d._sids[offset]._sid->_data;
+            }
+            else if (d.isPrefixID() || d.isPrefixIDIndex()) {
+                if (d._sids.size() <= offset) {
+                    FC_THROWM(Base::RuntimeError, "Missing string prefix id");
+                }
+                d._data = d._sids[offset]._sid->toString(0).c_str();
+                if (d.isPrefixIDIndex())
+                    d._data += ":";
+            }
+            else {
+                stream >> content;
+                d._data = content.c_str();
+            }
+            if (!d.isPostfixEncoded()) {
+                stream >> content;
+                d._postfix = content.c_str();
+            }
+        }
+
+        last = insert(sid);
+    }
+}
+
+StringID* StringHasher::insert(const StringIDRef& sid)
+{
+    assert(sid && sid._sid->_hasher == nullptr);
+    auto& hasher = *sid._sid;
+    hasher._hasher = this;
+    hasher.ref();
+    auto res = _hashes->right.insert(_hashes->right.end(),
+                                     HashMap::right_map::value_type(sid.value(), &hasher));
+    if (res->second != &hasher) {
+        hasher._hasher = nullptr;
+        hasher.unref();
+    }
+    return res->second;
+}
+
+void StringHasher::restoreStream(std::istream& stream, std::size_t count)
+{
+    _hashes->clear();
+    std::string content;
+    for (uint32_t i = 0; i < count; ++i) {
+        int32_t id = 0;
+        uint8_t type = 0;
+        stream >> id >> type >> content;
+        StringIDRef sid = new StringID(id, QByteArray(), static_cast<StringID::Flag>(type));
+        if (sid.isHashed() || sid.isBinary()) {
+            sid._sid->_data = QByteArray::fromBase64(content.c_str());
+        }
+        else {
+            sid._sid->_data = QByteArray(content.c_str());
+        }
+        insert(sid);
+    }
+}
+
+void StringHasher::clear()
+{
+    for (auto& hasher : _hashes->right) {
+        hasher.second->_hasher = nullptr;
+        hasher.second->unref();
+    }
+    _hashes->clear();
+}
+
+size_t StringHasher::size() const
+{
+    return _hashes->size();
+}
+
+size_t StringHasher::count() const
+{
+    size_t count = 0;
+    for (auto& hasher : _hashes->right) {
+        if (hasher.second->getRefCount() > 1) {
+            ++count;
+        }
+    }
+    return count;
+}
+
+void StringHasher::Restore(Base::XMLReader& reader)
+{
+    clear();
+    reader.readElement("StringHasher");
+    _hashes->SaveAll = reader.getAttributeAsInteger("saveall") != 0L;
+    _hashes->Threshold = static_cast<int>(reader.getAttributeAsInteger("threshold"));
+
+    bool newTag = false;
+    if (reader.hasAttribute("new") && reader.getAttributeAsInteger("new") > 0) {
+        reader.readElement("StringHasher2");
+        newTag = true;
+    }
+
+    if (reader.hasAttribute("file")) {
+        const char* file = reader.getAttribute("file");
+        if (*file != '\0') {
+            reader.addFile(file, this);
+        }
+        return;
+    }
+
+    std::size_t count = reader.getAttributeAsUnsigned("count");
+    if (newTag) {
+        restoreStreamNew(reader.beginCharStream(), count);
+        reader.readEndElement("StringHasher2");
+        return;
+    }
+    if ((count != 0U) && reader.FileVersion > 1) {
+        restoreStream(reader.beginCharStream(), count);
+    }
+    else {
+        for (std::size_t i = 0; i < count; ++i) {
+            reader.readElement("Item");
+            StringIDRef sid;
+            long id = reader.getAttributeAsInteger("id");
+            bool hashed = reader.hasAttribute("hash");
+            if (hashed || reader.hasAttribute("data")) {
+                const char* value =
+                    hashed ? reader.getAttribute("hash") : reader.getAttribute("data");
+                sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed);
+            }
+            else {
+                sid = new StringID(id, QByteArray(reader.getAttribute("text")));
+            }
+            insert(sid);
+        }
+    }
+    reader.readEndElement("StringHasher");
+}
+
+unsigned int StringHasher::getMemSize() const
+{
+    return (_hashes->SaveAll ? size() : count()) * 10;
+}
+
+PyObject* StringHasher::getPyObject()
+{
+    return new StringHasherPy(this);
+}
+
+std::map<long, StringIDRef> StringHasher::getIDMap() const
+{
+    std::map<long, StringIDRef> ret;
+    for (auto& hasher : _hashes->right) {
+        ret.emplace_hint(ret.end(), hasher.first, StringIDRef(hasher.second));
+    }
+    return ret;
+}
+
+void StringHasher::clearMarks() const
+{
+    for (auto& hasher : _hashes->right) {
+        hasher.second->_flags.setFlag(StringID::Flag::Marked, false);
+    }
+}
--- a/src/App/StringHasher.h
+++ b/src/App/StringHasher.h
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+/***************************************************************************************************
+ *                                                                                                 *
+ *   Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com>                       *
+ *   Copyright (c) 2023 FreeCAD Project Association                                                *
+ *                                                                                                 *
+ *   This file is part of FreeCAD.                                                                 *
+ *                                                                                                 *
+ *   FreeCAD is free software: you can redistribute it and/or modify it under the terms of the     *
+ *   GNU Lesser General Public License as published by the Free Software Foundation, either        *
+ *   version 2.1 of the License, or (at your option) any later version.                            *
+ *                                                                                                 *
+ *   FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;          *
+ *   without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     *
+ *   See the GNU Lesser General Public License for more details.                                   *
+ *                                                                                                 *
+ *   You should have received a copy of the GNU Lesser General Public License along with           *
+ *   FreeCAD. If not, see <https://www.gnu.org/licenses/>.                                         *
+ *                                                                                                 *
+ **************************************************************************************************/
+
+#ifndef APP_STRING_ID_H
+#define APP_STRING_ID_H
+
+#include <bitset>
+#include <memory>
+
+#include <QByteArray>
+#include <QVector>
+
+#include <Base/Bitmask.h>
+#include <Base/Handle.h>
+#include <Base/Persistence.h>
+#include <CXX/Objects.hxx>
+#include <utility>
+
+#include <Base/PyObjectBase.h>
+
+
+namespace Data
+{
+class MappedName;
+}
+
+namespace App
+{
+
+class StringHasher;
+class StringID;
+class StringIDRef;
+using StringHasherRef = Base::Reference<StringHasher>;
+
+/** Class to store a string
+ *
+ * The main purpose of this class is to provide an efficient storage of the
+ * mapped geometry element name (i.e. the new Topological Naming), but it can
+ * also be used as a general purpose string table.
+ *
+ * The StringID is to be stored in a string table (StringHasher), and be
+ * referred to by an integer ID. The stored data can be optionally divided into
+ * two parts, prefix and postfix. This is because a new mapped name is often
+ * created by adding some common postfix to an existing name, so data sharing
+ * can be improved using the following techniques:
+ *
+ *      a) reference count (through QByteArray) the main data part,
+ *
+ *      b) (recursively) encode prefix and/or postfix as an integer (in the
+ *         format of #<hex>, e.g. #1b) that references another StringID,
+ *
+ *      c) Check index based name in prefix, e.g. Edge1, Vertex2, and encode
+ *         only the text part as StringID. The index is stored separately in
+ *         reference class StringIDRef to maximize data sharing.
+ */
+class AppExport StringID: public Base::BaseClass, public Base::Handled
+{
+    TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
+
+public:
+    /// Flag of the stored string data
+    enum class Flag
+    {
+        /// No flag
+        None = 0,
+        /// The stored data is binary
+        Binary = 1 << 0,
+        /// The stored data is the sha1 hash of the original content
+        Hashed = 1 << 1,
+        /** Postfix is encoded as #<hex>, e.g. #1b, where the hex integer part
+         * refers to another StringID.
+         */
+        PostfixEncoded = 1 << 2,
+        /// The data is split as prefix and postfix
+        Postfixed = 1 << 3,
+        /// The prefix data is split as text + index
+        Indexed = 1 << 4,
+        /** The prefix data is encoded as #<hex>, e.g. #1b, where the hex
+         * integer part refers to another StringID.
+         */
+        PrefixID = 1 << 5,
+        /** The prefix split as text + index, where the text is encoded
+         * using another StringID.
+         */
+        PrefixIDIndex = 1 << 6,
+        /// The string ID is persistent regardless of internal mark
+        Persistent = 1 << 7,
+        /// Internal marked used to check if the string ID is used
+        Marked = 1 << 8,
+    };
+    using Flags = Base::Flags<Flag>;
+
+    /** Constructor
+     * @param id: integer ID of this StringID
+     * @param data: input data
+     * @param flags: flags describes the data
+     *
+     * User code is not supposed to create StringID directly, but through StringHasher::getID()
+     */
+    StringID(long id, QByteArray data, const Flags& flags = Flag::None)
+        : _id(id),
+          _data(std::move(data)),
+          _flags(flags)
+    {}
+
+    /// Constructs an empty StringID
+    StringID()
+        : _id(0),
+          _flags(Flag::None)
+    {}
+
+    StringID(const StringID& other) = delete;
+    StringID(StringID&& other) noexcept = delete;
+    StringID& operator=(const StringID& rhs) = delete;
+    StringID& operator=(StringID&& rhs) noexcept = delete;
+
+    ~StringID() override;
+
+    /// Returns the ID of this StringID
+    long value() const
+    {
+        return _id;
+    }
+
+    /// Returns all related StringIDs that used to encode this StringID
+    const QVector<StringIDRef>& relatedIDs() const
+    {
+        return _sids;
+    }
+
+    /// @name Flag accessors
+    //@{
+    bool isBinary() const;
+    bool isHashed() const;
+    bool isPostfixed() const;
+    bool isPostfixEncoded() const;
+    bool isIndexed() const;
+    bool isPrefixID() const;
+    bool isPrefixIDIndex() const;
+    bool isMarked() const;
+    bool isPersistent() const;
+    //@}
+
+    /// Checks if this StringID is from the input hasher
+    bool isFromSameHasher(const StringHasherRef& hasher) const
+    {
+        return this->_hasher == hasher;
+    }
+
+    /// Returns the owner hasher
+    StringHasherRef getHasher() const
+    {
+        return {_hasher};
+    }
+
+    /// Returns the data (prefix)
+    QByteArray data() const
+    {
+        return _data;
+    }
+
+    /// Returns the postfix
+    QByteArray postfix() const
+    {
+        return _postfix;
+    }
+
+    /// Sets the postfix
+    void setPostfix(QByteArray postfix)
+    {
+        _postfix = std::move(postfix);
+    }
+
+    PyObject* getPyObject() override;
+    /// Returns a Python tuple containing both the text and index
+    PyObject* getPyObjectWithIndex(int index);
+
+    /** Convert to string representation of this StringID
+     * @param index: optional index
+     *
+     * The format is #<id>. And if index is non zero, then #<id>:<index>. Both
+     * <id> and <index> are in hex format.
+     */
+    std::string toString(int index = 0) const;
+
+    /// Light weight structure of holding a string ID and associated index
+    struct IndexID
+    {
+        long id;
+        int index;
+
+        explicit operator bool() const
+        {
+            return id > 0;
+        }
+
+        friend std::ostream& operator<<(std::ostream& stream, const IndexID& indexID)
+        {
+            stream << indexID.id;
+            if (indexID.index != 0) {
+                stream << ':' << indexID.index;
+            }
+            return stream;
+        }
+    };
+
+    /** Parse string to get ID and index
+     * @param name: input string
+     * @param eof: Whether to check the end of string. If true, then the input
+     *             string must contain only the string representation of this
+     *             StringID
+     * @param size: input string size, or -1 if the input string is zero terminated.
+     * @return Return the integer ID and index.
+     *
+     * The input string is expected to be in the format of #<id> or with index
+     * #<id>:<index>, where both id and index are in hex digits.
+     */
+    static IndexID fromString(const char* name, bool eof = true, int size = -1);
+
+    /** Parse string to get ID and index
+     * @param bytes: input data
+     * @param eof: Whether to check the end of string. If true, then the input
+     *             string must contain only the string representation of this
+     *             StringID
+     *
+     * The input string is expected to be in the format of #<id> or with index
+     * #<id>:<index>, where both id and index are in hex digits.
+     */
+    static IndexID fromString(const QByteArray& bytes, bool eof = true)
+    {
+        return fromString(bytes.constData(), eof, bytes.size());
+    }
+
+    /** Get the text content of this StringID
+     * @param index: optional index
+     * @return Return the text content of this StringID. If the data is binary,
+     *         then output in base64 encoded string.
+     */
+    std::string dataToText(int index = 0) const;
+
+    /** Get the content of this StringID as QByteArray
+     * @param index: optional index.
+     */
+    QByteArray dataToBytes(int index = 0) const
+    {
+        QByteArray res(_data);
+        if (index != 0) {
+            res += QByteArray::number(index);
+        }
+        if (_postfix.size() != 0) {
+            res += _postfix;
+        }
+        return res;
+    }
+
+    /// Mark this StringID as used
+    void mark() const;
+
+    /// Mark the StringID as persistent regardless of usage mark
+    void setPersistent(bool enable);
+
+    bool operator<(const StringID& other) const
+    {
+        return compare(other) < 0;
+    }
+
+    /** Compare StringID
+     * @param other: the other StringID for comparison
+     * @return Returns -1 if less than the other StringID, 1 if greater, or 0 if equal
+     */
+    int compare(const StringID& other) const
+    {
+        if (_hasher < other._hasher) {
+            return -1;
+        }
+        if (_hasher > other._hasher) {
+            return 1;
+        }
+        if (_id < other._id) {
+            return -1;
+        }
+        if (_id > other._id) {
+            return 1;
+        }
+        return 0;
+    }
+
+    friend class StringHasher;
+
+private:
+    long _id;
+    QByteArray _data;
+    QByteArray _postfix;
+    StringHasher* _hasher = nullptr;
+    mutable Flags _flags;
+    mutable QVector<StringIDRef> _sids;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+/** Counted reference to a StringID instance
+ */
+class StringIDRef
+{
+public:
+    /// Default construction results in an empty StringIDRef object: it will evaluate to boolean
+    /// "false" if queried.
+    StringIDRef()
+        : _sid(nullptr),
+          _index(0)
+    {}
+
+    /// Standard construction from a heap-allocated StringID. This reference-counting class manages
+    /// the lifetime of the StringID, ensuring it is deallocated when its reference count goes to
+    /// zero.
+    /// \param stringID A pointer to a StringID allocated with "new"
+    /// \param index (optional) An index value to store along with the StringID. Defaults to zero.
+    StringIDRef(StringID* stringID, int index = 0)
+        : _sid(stringID),
+          _index(index)
+    {
+        if (_sid) {
+            _sid->ref();
+        }
+    }
+
+    /// Copy construction results in an incremented reference count for the stored StringID
+    StringIDRef(const StringIDRef& other)
+        : _sid(other._sid),
+          _index(other._index)
+    {
+        if (_sid) {
+            _sid->ref();
+        }
+    }
+
+    /// Move construction does NOT increase the reference count of the StringID (instead, it
+    /// invalidates the pointer in the moved object).
+    StringIDRef(StringIDRef&& other) noexcept
+        : _sid(other._sid),
+          _index(other._index)
+    {
+        other._sid = nullptr;
+    }
+
+    StringIDRef(const StringIDRef& other, int index)
+        : _sid(other._sid),
+          _index(index)
+    {
+        if (_sid) {
+            _sid->ref();
+        }
+    }
+
+    ~StringIDRef()
+    {
+        if (_sid) {
+            _sid->unref();
+        }
+    }
+
+    void reset(const StringIDRef& stringID = StringIDRef())
+    {
+        *this = stringID;
+    }
+
+    void reset(const StringIDRef& stringID, int index)
+    {
+        *this = stringID;
+        this->_index = index;
+    }
+
+    void swap(StringIDRef& stringID)
+    {
+        if (*this != stringID) {
+            auto tmp = stringID;
+            stringID = *this;
+            *this = tmp;
+        }
+    }
+
+    StringIDRef& operator=(StringID* stringID)
+    {
+        if (_sid == stringID) {
+            return *this;
+        }
+        if (_sid) {
+            _sid->unref();
+        }
+        _sid = stringID;
+        if (_sid) {
+            _sid->ref();
+        }
+        this->_index = 0;
+        return *this;
+    }
+
+    StringIDRef& operator=(const StringIDRef& stringID)
+    {
+        if (&stringID == this) {
+            return *this;
+        }
+        if (_sid != stringID._sid) {
+            if (_sid) {
+                _sid->unref();
+            }
+            _sid = stringID._sid;
+            if (_sid) {
+                _sid->ref();
+            }
+        }
+        this->_index = stringID._index;
+        return *this;
+    }
+
+    StringIDRef& operator=(StringIDRef&& stringID) noexcept
+    {
+        if (_sid != stringID._sid) {
+            if (_sid) {
+                _sid->unref();
+            }
+            _sid = stringID._sid;
+            stringID._sid = nullptr;
+        }
+        this->_index = stringID._index;
+        return *this;
+    }
+
+    bool operator<(const StringIDRef& stringID) const
+    {
+        if (!stringID._sid) {
+            return false;
+        }
+        if (!_sid) {
+            return true;
+        }
+        int res = _sid->compare(*stringID._sid);
+        if (res < 0) {
+            return true;
+        }
+        if (res > 0) {
+            return false;
+        }
+        return _index < stringID._index;
+    }
+
+    bool operator==(const StringIDRef& stringID) const
+    {
+        if (_sid && stringID._sid) {
+            return _sid->compare(*stringID._sid) == 0 && _index == stringID._index;
+        }
+        return _sid == stringID._sid;
+    }
+
+    bool operator!=(const StringIDRef& stringID) const
+    {
+        return !(*this == stringID);
+    }
+
+    explicit operator bool() const
+    {
+        return _sid != nullptr;
+    }
+
+    int getRefCount() const
+    {
+        if (_sid) {
+            return _sid->getRefCount();
+        }
+        return 0;
+    }
+
+    std::string toString() const
+    {
+        if (_sid) {
+            return _sid->toString(_index);
+        }
+        return {};
+    }
+
+    std::string dataToText() const
+    {
+        if (_sid) {
+            return _sid->dataToText(_index);
+        }
+        return {};
+    }
+
+    /// Get a reference to the data: only makes sense if index and postfix are both empty, but
+    /// calling code is responsible for ensuring that.
+    const char* constData() const
+    {
+        if (_sid) {
+            assert(_index == 0);
+            assert(_sid->postfix().isEmpty());
+            return _sid->data().constData();
+        }
+        return "";
+    }
+
+    const StringID& deref() const
+    {
+        return *_sid;
+    }
+
+    long value() const
+    {
+        if (_sid) {
+            return _sid->value();
+        }
+        return 0;
+    }
+
+    QVector<StringIDRef> relatedIDs() const
+    {
+        if (_sid) {
+            return _sid->relatedIDs();
+        }
+        return {};
+    }
+
+    bool isBinary() const
+    {
+        if (_sid) {
+            return _sid->isBinary();
+        }
+        return false;
+    }
+
+    bool isHashed() const
+    {
+        if (_sid) {
+            return _sid->isHashed();
+        }
+        return false;
+    }
+
+    void toBytes(QByteArray& bytes) const
+    {
+        if (_sid) {
+            bytes = _sid->dataToBytes(_index);
+        }
+    }
+
+    PyObject* getPyObject()
+    {
+        if (_sid) {
+            return _sid->getPyObjectWithIndex(_index);
+        }
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+
+    void mark() const
+    {
+        if (_sid) {
+            _sid->mark();
+        }
+    }
+
+    bool isMarked() const
+    {
+        return _sid && _sid->isMarked();// NOLINT
+    }
+
+    bool isFromSameHasher(const StringHasherRef& hasher) const
+    {
+        return _sid && _sid->isFromSameHasher(hasher);// NOLINT
+    }
+
+    StringHasherRef getHasher() const
+    {
+        if (_sid) {
+            return _sid->getHasher();
+        }
+        return {};
+    }
+
+    void setPersistent(bool enable)
+    {
+        if (_sid) {
+            _sid->setPersistent(enable);
+        }
+    }
+
+    /// Used predominantly by the unit test code to verify that index is set correctly. In general
+    /// user code should not need to call this function.
+    int getIndex() const
+    {
+        return _index;
+    }
+
+    friend class StringHasher;
+
+private:
+    StringID* _sid;
+    int _index;
+};
+
+
+/// \brief A bidirectional map  of strings and their integer identifier.
+///
+/// Maps an arbitrary text string to a unique integer ID, maintaining a reference-counted shared
+/// pointer for each. This permits elimination of unused strings based on their reference
+/// count. If a duplicate string is added, no additional copy is made, and a new reference to the
+/// original storage is returned (incrementing the reference counter of the instance).
+///
+/// If the string is longer than a given threshold, instead of storing the string, its SHA1 hash is
+/// stored (and the original string discarded). This allows an upper threshold on the length of a
+/// stored string, while still effectively guaranteeing uniqueness in the table.
+class AppExport StringHasher: public Base::Persistence, public Base::Handled
+{
+
+    TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
+
+public:
+    StringHasher();
+    ~StringHasher() override;
+
+    StringHasher(const StringHasher&) = delete;
+    StringHasher(StringHasher&&) noexcept = delete;
+    StringHasher& operator=(StringHasher& other) = delete;
+    StringHasher& operator=(StringHasher&& other) noexcept = delete;
+
+    unsigned int getMemSize() const override;
+    void Save(Base::Writer& /*writer*/) const override;
+    void Restore(Base::XMLReader& /*reader*/) override;
+    void SaveDocFile(Base::Writer& /*writer*/) const override;
+    void RestoreDocFile(Base::Reader& /*reader*/) override;
+    void setPersistenceFileName(const char* name) const;
+    const std::string& getPersistenceFileName() const;
+
+    /** Maps an arbitrary string to an integer
+     *
+     * @param text: input string.
+     * @param len: length of the string: optional if the string is null-terminated.
+     * @param hashable: whether hashing the string is permitted.
+     * @return A shared pointer to the internally-stored StringID.
+     *
+     * Maps an arbitrary text string to a unique integer ID, returning a reference-counted shared
+     * pointer to the StringID. This permits elimination of unused strings based on their reference
+     * count. If a duplicate string is added, no additional copy is made, and a new reference to the
+     * original storage is returned (incrementing the reference counter of the instance).
+     *
+     * If \c hashable is true and the string is longer than the threshold setting of this
+     * StringHasher, only the SHA1 hash of the string is stored: the original content of the string
+     * is discarded. If \c hashable is false, the string is copied and stored inside a StringID
+     * instance.
+     *
+     * The purpose of this function is to provide a short form of a stable string identification.
+     */
+    StringIDRef getID(const char* text, int len = -1, bool hashable = false);
+
+    /// Options for string string data
+    enum class Option
+    {
+        /// No option is set
+        None = 0,
+
+        /// The input data is binary
+        Binary = 1 << 0,
+
+        /// Hashing is permitted for this input data. If the data length is longer than the
+        /// threshold setting of the StringHasher, it will be sha1 hashed before storing, and the
+        /// original content of the string is discarded.
+        Hashable = 1 << 1,
+
+        /// Do not copy the data: assume it is constant and exists for the lifetime of this hasher.
+        /// If this option is not set, the data will be copied before storing.
+        NoCopy = 1 << 2,
+    };
+    using Options = Base::Flags<Option>;
+
+    /** Map text or binary data to an integer
+     *
+     * @param data: input data.
+     * @param options: options describing how to store the data.
+     * @return A shared pointer to the internally stored StringID.
+     *
+     * \sa getID (const char*, int, bool);
+     */
+    StringIDRef getID(const QByteArray& data, Options options = Option::Hashable);
+
+    /** Map geometry element name to an integer */
+    StringIDRef getID(const Data::MappedName& name, const QVector<StringIDRef>& sids);
+
+    /** Obtain the reference counted StringID object from numerical id
+     *
+     * @param id: string ID
+     * @param index: optional index of the string ID
+     * @return Return a shared pointer to the internally stored StringID.
+     *
+     * This function exists because the stored string may be one way hashed,
+     * and the original text is not persistent. The caller use this function to
+     * retrieve the reference count ID object after restore
+     */
+    StringIDRef getID(long id, int index = 0) const;
+
+    /** Obtain the reference counted StringID object from numerical id and index
+     *
+     * @param id: string ID with index
+     * @return Return a shared pointer to the internally stored StringID.
+     */
+    StringIDRef getID(const StringID::IndexID& id) const
+    {
+        return getID(id.id, id.index);
+    }
+
+    std::map<long, StringIDRef> getIDMap() const;
+
+    /// Clear all string hashes
+    void clear();
+
+    /// Size of the hash table
+    size_t size() const;
+
+    /// Return the number of hashes that are used by others
+    size_t count() const;
+
+    PyObject* getPyObject() override;
+
+    /** Enable/disable saving all string ID
+     *
+     * If saveAll is true, then compact() does nothing even when called explicitly. Setting
+     * saveAll it to false causes compact() to be run immediately.
+     */
+    void setSaveAll(bool enable);
+    bool getSaveAll() const;
+
+    /** Set threshold of string hashing
+     *
+     * For hashable strings that are longer than this threshold, the string will
+     * be replaced by its sha1 hash.
+     */
+    void setThreshold(int threshold);
+    int getThreshold() const;
+
+    /** Clear internal marks
+     *
+     * The internal marks on internally stored StringID instances are used to
+     * check if the StringID is used.
+     */
+    void clearMarks() const;
+
+    /// Compact string storage by eliminating unused strings from the table.
+    void compact();
+
+    class HashMap;
+    friend class StringID;
+
+protected:
+    StringID* insert(const StringIDRef& sid);
+    long lastID() const;
+    void saveStream(std::ostream& stream) const;
+    void restoreStream(std::istream& stream, std::size_t count);
+    void restoreStreamNew(std::istream& stream, std::size_t count);
+
+private:
+    std::unique_ptr<HashMap> _hashes;///< Bidirectional map of StringID and its index (a long int).
+    mutable std::string _filename;
+};
+}// namespace App
+
+ENABLE_BITMASK_OPERATORS(App::StringID::Flag)
+ENABLE_BITMASK_OPERATORS(App::StringHasher::Option)
+
+namespace App
+{
+inline bool StringID::isBinary() const
+{
+    return _flags.testFlag(Flag::Binary);
+}
+inline bool StringID::isHashed() const
+{
+    return _flags.testFlag(Flag::Hashed);
+}
+inline bool StringID::isPostfixed() const
+{
+    return _flags.testFlag(Flag::Postfixed);
+}
+inline bool StringID::isPostfixEncoded() const
+{
+    return _flags.testFlag(Flag::PostfixEncoded);
+}
+inline bool StringID::isIndexed() const
+{
+    return _flags.testFlag(Flag::Indexed);
+}
+inline bool StringID::isPrefixID() const
+{
+    return _flags.testFlag(Flag::PrefixID);
+}
+inline bool StringID::isPrefixIDIndex() const
+{
+    return _flags.testFlag(Flag::PrefixIDIndex);
+}
+inline bool StringID::isMarked() const
+{
+    return _flags.testFlag(Flag::Marked);
+}
+inline bool StringID::isPersistent() const
+{
+    return _flags.testFlag(Flag::Persistent);
+}
+inline void StringID::setPersistent(bool enable)
+{
+    _flags.setFlag(Flag::Persistent, enable);
+}
+}// namespace App
+
+#endif// APP_STRING_ID_H
--- a/src/App/StringHasherPy.xml
+++ b/src/App/StringHasherPy.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<GenerateModel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="generateMetaModel_Module.xsd">
+    <PythonExport 
+        Father="BaseClassPy" 
+        Name="StringHasherPy" 
+        Twin="StringHasher" 
+        TwinPointer="StringHasher" 
+        Include="App/StringHasher.h" 
+        FatherInclude="Base/BaseClassPy.h" 
+        Namespace="App" 
+        FatherNamespace="Base"
+        Constructor="true"
+        Reference="true">
+        <Documentation>
+            <Author Licence="LGPL" Name="Zheng, Lei" EMail="realthunder.dev@gmail.com" />
+            <DeveloperDocu>This is the StringHasher class</DeveloperDocu>
+            <UserDocu>This is the StringHasher class</UserDocu>
+        </Documentation>
+        <Methode Name="getID">
+            <Documentation>
+                <UserDocu>
+getID(txt|id, base64=False) -> StringID
+
+If the input is text, return a StringID object that is unique within this hasher. This
+StringID object is reference counted. The hasher may only save hash ID's that are used.
+
+If the input is an integer, then the hasher will try to find the StringID object stored
+with the same integer value. 
+
+base64: indicate if the input 'txt' is base64 encoded binary data
+                </UserDocu>
+            </Documentation>
+        </Methode>
+        <Methode Name="isSame" Const="true">
+            <Documentation>
+                <UserDocu>Check if two hasher are the same</UserDocu>
+            </Documentation>
+        </Methode>
+        <Attribute Name="Count" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Return count of used hashes</UserDocu>
+            </Documentation>
+            <Parameter Name="Count" Type="Int" />
+        </Attribute>
+        <Attribute Name="Size" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Return the size of the hashes</UserDocu>
+            </Documentation>
+            <Parameter Name="Size" Type="Int"/>
+        </Attribute>
+        <Attribute Name="SaveAll">
+            <Documentation>
+                <UserDocu>Whether to save all string hashes regardless of its use count</UserDocu>
+            </Documentation>
+            <Parameter Name="SaveAll" Type="Boolean"/>
+        </Attribute>
+        <Attribute Name="Threshold">
+            <Documentation>
+                <UserDocu>Data length exceed this threshold will be hashed before storing</UserDocu>
+            </Documentation>
+            <Parameter Name="Threshold" Type="Int"/>
+        </Attribute>
+        <Attribute Name="Table" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Return the entire string table as Int->String dictionary</UserDocu>
+            </Documentation>
+            <Parameter Name="Table" Type="Dict"/>
+        </Attribute>
+    </PythonExport>
+</GenerateModel>
+
--- a/src/App/StringHasherPyImp.cpp
+++ b/src/App/StringHasherPyImp.cpp
@@ -0,0 +1,148 @@
+/****************************************************************************
+*   Copyright (c) 2018 Zheng Lei (realthunder) <realthunder.dev@gmail.com> *
+*                                                                          *
+*   This file is part of the FreeCAD CAx development system.               *
+*                                                                          *
+*   This library is free software; you can redistribute it and/or          *
+*   modify it under the terms of the GNU Library General Public            *
+*   License as published by the Free Software Foundation; either           *
+*   version 2 of the License, or (at your option) any later version.       *
+*                                                                          *
+*   This library  is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of         *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
+*   GNU Library General Public License for more details.                   *
+*                                                                          *
+*   You should have received a copy of the GNU Library General Public      *
+*   License along with this library; see the file COPYING.LIB. If not,     *
+*   write to the Free Software Foundation, Inc., 59 Temple Place,          *
+*   Suite 330, Boston, MA  02111-1307, USA                                 *
+*                                                                          *
+****************************************************************************/
+
+#include "PreCompiled.h"
+
+#include "StringHasher.h"
+
+#include "StringHasherPy.h"
+#include "StringHasherPy.cpp"
+
+using namespace App;
+
+// returns a string which represent the object e.g. when printed in python
+std::string StringHasherPy::representation(void) const
+{
+   std::ostringstream str;
+   str << "<StringHasher at " << getStringHasherPtr() << ">";
+   return str.str();
+}
+
+PyObject *StringHasherPy::PyMake(struct _typeobject *, PyObject *, PyObject *)  // Python wrapper
+{
+   return new StringHasherPy(new StringHasher);
+}
+
+// constructor method
+int StringHasherPy::PyInit(PyObject* , PyObject* )
+{
+   return 0;
+}
+
+
+PyObject* StringHasherPy::isSame(PyObject *args)
+{
+   PyObject *other;
+   if (!PyArg_ParseTuple(args, "O!", &StringHasherPy::Type, &other)){     // convert args: Python->C
+       return Py::new_reference_to(Py::False());
+   }
+   auto otherHasher = static_cast<StringHasherPy*>(other)->getStringHasherPtr();
+   return Py::new_reference_to(Py::Boolean(getStringHasherPtr() == otherHasher));
+}
+
+PyObject* StringHasherPy::getID(PyObject *args)
+{
+   long id = -1;
+   int index = 0;
+   PyObject *value = 0;
+   PyObject *base64 = Py_False;
+   if (!PyArg_ParseTuple(args, "l|i",&id,&index)) {
+       PyErr_Clear();
+       if (!PyArg_ParseTuple(args, "O|O",&value,&base64))
+           return NULL;    // NULL triggers exception
+   }
+   if(id>0) {
+       PY_TRY {
+           auto sid = getStringHasherPtr()->getID(id, index);
+           if(!sid) Py_Return;
+           return sid.getPyObject();
+       }PY_CATCH;
+   }
+   std::string txt;
+#if PY_MAJOR_VERSION >= 3
+   if (PyUnicode_Check(value)) {
+       txt = PyUnicode_AsUTF8(value);
+   }
+#else
+   if (PyUnicode_Check(value)) {
+       PyObject* unicode = PyUnicode_AsLatin1String(value);
+       txt = PyString_AsString(unicode);
+       Py_DECREF(unicode);
+   }
+   else if (PyString_Check(value)) {
+       txt = PyString_AsString(value);
+   }
+#endif
+   else
+       throw Py::TypeError("expect argument of type string");
+   PY_TRY {
+       QByteArray data;
+       StringIDRef sid;
+       if(PyObject_IsTrue(base64)) {
+           data = QByteArray::fromBase64(QByteArray::fromRawData(txt.c_str(),txt.size()));
+           sid = getStringHasherPtr()->getID(data,true);
+       }else
+           sid = getStringHasherPtr()->getID(txt.c_str(),txt.size());
+       return sid.getPyObject();
+   }PY_CATCH;
+}
+
+Py::Int StringHasherPy::getCount(void) const {
+   return Py::Int((long)getStringHasherPtr()->count());
+}
+
+Py::Int StringHasherPy::getSize(void) const {
+   return Py::Int((long)getStringHasherPtr()->size());
+}
+
+Py::Boolean StringHasherPy::getSaveAll(void) const {
+   return Py::Boolean(getStringHasherPtr()->getSaveAll());
+}
+
+void StringHasherPy::setSaveAll(Py::Boolean value) {
+   getStringHasherPtr()->setSaveAll(value);
+}
+
+Py::Int StringHasherPy::getThreshold(void) const {
+   return Py::Int((long)getStringHasherPtr()->getThreshold());
+}
+
+void StringHasherPy::setThreshold(Py::Int value) {
+   getStringHasherPtr()->setThreshold(value);
+}
+
+Py::Dict StringHasherPy::getTable() const {
+   Py::Dict dict;
+   for(auto &v : getStringHasherPtr()->getIDMap())
+       dict.setItem(Py::Int(v.first),Py::String(v.second.dataToText()));
+   return dict;
+}
+
+PyObject *StringHasherPy::getCustomAttributes(const char* /*attr*/) const
+{
+   return 0;
+}
+
+int StringHasherPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/)
+{
+   return 0;
+}
--- a/src/App/StringIDPy.xml
+++ b/src/App/StringIDPy.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<GenerateModel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="generateMetaModel_Module.xsd">
+    <PythonExport 
+        Father="BaseClassPy" 
+        Name="StringIDPy" 
+        Twin="StringID" 
+        TwinPointer="StringID" 
+        Include="App/StringHasher.h" 
+        FatherInclude="Base/BaseClassPy.h" 
+        Namespace="App" 
+        FatherNamespace="Base"
+        Reference="true">
+        <Documentation>
+            <Author Licence="LGPL" Name="Zheng, Lei" EMail="realthunder.dev@gmail.com" />
+            <DeveloperDocu>This is the StringID class</DeveloperDocu>
+            <UserDocu>This is the StringID class</UserDocu>
+        </Documentation>
+        <Methode Name="isSame" Const="true">
+            <Documentation>
+                <UserDocu>Check if two StringIDs are the same</UserDocu>
+            </Documentation>
+        </Methode>
+        <Attribute Name="Value" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Return the integer value of this ID</UserDocu>
+            </Documentation>
+            <Parameter Name="Value" Type="Int"/>
+        </Attribute>
+        <Attribute Name="Related" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Return the related string IDs</UserDocu>
+            </Documentation>
+            <Parameter Name="Related" Type="List"/>
+        </Attribute>
+        <Attribute Name="Data" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Return the data associated with this ID</UserDocu>
+            </Documentation>
+            <Parameter Name="Data" Type="String"/>
+        </Attribute>
+        <Attribute Name="IsBinary" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Check if the data is binary, </UserDocu>
+            </Documentation>
+            <Parameter Name="IsBinary" Type="Boolean"/>
+        </Attribute>
+        <Attribute Name="IsHashed" ReadOnly="true">
+            <Documentation>
+                <UserDocu>Check if the data is hash, if so 'Data' returns a base64 encoded string of the raw hash</UserDocu>
+            </Documentation>
+            <Parameter Name="IsHashed" Type="Boolean"/>
+        </Attribute>
+        <Attribute Name="Index" ReadOnly="false">
+            <Documentation>
+                <UserDocu>Geometry index. Only meaningful for geometry element name</UserDocu>
+            </Documentation>
+            <Parameter Name="Index" Type="Int"/>
+        </Attribute>
+		<ClassDeclarations>private:
+    friend class StringID;
+    int _index = 0;
+		</ClassDeclarations>
+    </PythonExport>
+</GenerateModel>
+
--- a/src/App/StringIDPyImp.cpp
+++ b/src/App/StringIDPyImp.cpp
@@ -0,0 +1,90 @@
+/***************************************************************************
+*   Copyright (c) 2018 Zheng Lei (realthunder) <realthunder.dev@gmail.com> *
+*                                                                          *
+*   This file is part of the FreeCAD CAx development system.               *
+*                                                                          *
+*   This library is free software; you can redistribute it and/or          *
+*   modify it under the terms of the GNU Library General Public            *
+*   License as published by the Free Software Foundation; either           *
+*   version 2 of the License, or (at your option) any later version.       *
+*                                                                          *
+*   This library  is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of         *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
+*   GNU Library General Public License for more details.                   *
+*                                                                          *
+*   You should have received a copy of the GNU Library General Public      *
+*   License along with this library; see the file COPYING.LIB. If not,     *
+*   write to the Free Software Foundation, Inc., 59 Temple Place,          *
+*   Suite 330, Boston, MA  02111-1307, USA                                 *
+*                                                                          *
+****************************************************************************/
+
+#include "PreCompiled.h"
+
+#include "StringHasher.h"
+
+#include "StringIDPy.h"
+#include "StringIDPy.cpp"
+
+using namespace App;
+
+// returns a string which represent the object e.g. when printed in python
+std::string StringIDPy::representation() const
+{
+   return getStringIDPtr()->toString(_index);
+}
+
+PyObject* StringIDPy::isSame(PyObject *args)
+{
+   PyObject *other = nullptr;
+   if (PyArg_ParseTuple(args, "O!", &StringIDPy::Type, &other) == 0) { // convert args: Python->C
+       return Py::new_reference_to(Py::False());
+   }
+   auto *otherPy = static_cast<StringIDPy*>(other);
+   return Py::new_reference_to(Py::Boolean(
+       otherPy->getStringIDPtr() == this->getStringIDPtr()
+       && otherPy->_index == this->_index));
+}
+
+Py::Int StringIDPy::getValue() const {
+   return Py::Int(getStringIDPtr()->value());
+}
+
+Py::List StringIDPy::getRelated() const {
+   Py::List list;
+   for (const auto &id : getStringIDPtr()->relatedIDs()) {
+       list.append(Py::Long(id.value()));
+}
+   return list;
+}
+
+Py::String StringIDPy::getData() const {
+   return {Py::String(getStringIDPtr()->dataToText(this->_index))};
+}
+
+Py::Boolean StringIDPy::getIsBinary() const {
+   return {getStringIDPtr()->isBinary()};
+}
+
+Py::Boolean StringIDPy::getIsHashed() const {
+   return {getStringIDPtr()->isHashed()};
+}
+
+Py::Int StringIDPy::getIndex() const {
+   return Py::Int(this->_index);
+}
+
+void StringIDPy::setIndex(Py::Int index) {
+   this->_index = index;
+}
+
+PyObject *StringIDPy::getCustomAttributes(const char* /*attr*/) const
+{
+   return nullptr;
+}
+
+int StringIDPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/)
+{
+   return 0;
+}
--- a/src/Base/Bitmask.h
+++ b/src/Base/Bitmask.h
@@ -123,6 +123,9 @@ public:
        using u = typename std::underlying_type<Enum>::type;
        return static_cast<u>(i) == static_cast<u>(f.i);
    }
+    typename std::underlying_type<Enum>::type toUnderlyingType() const {
+        return static_cast<typename std::underlying_type<Enum>::type>(i);
+    }
 };
 }

--- a/src/Base/Reader.cpp
+++ b/src/Base/Reader.cpp
@@ -24,6 +24,7 @@
 #include "PreCompiled.h"

 #ifndef _PreComp_
+#include <memory>
 # include <xercesc/sax2/XMLReaderFactory.hpp>
 #endif

@@ -42,6 +43,7 @@
 #include <zipios++/zipios-config.h>
 #endif
 #include <zipios++/zipinputstream.h>
+#include <boost/iostreams/filtering_stream.hpp>


 XERCES_CPP_NAMESPACE_USE
@@ -283,6 +285,85 @@ void Base::XMLReader::readCharacters()
 {
 }

+std::streamsize Base::XMLReader::read(char_type* s, std::streamsize n)
+{
+
+    char_type* buf = s;
+    if (CharacterOffset < 0) {
+        return -1;
+    }
+
+    for (;;) {
+        std::streamsize copy_size =
+            static_cast<std::streamsize>(Characters.size()) - CharacterOffset;
+        if (n < copy_size) {
+            copy_size = n;
+        }
+        std::memcpy(s, Characters.c_str() + CharacterOffset, copy_size);
+        n -= copy_size;
+        s += copy_size;
+        CharacterOffset += copy_size;
+
+        if (!n) {
+            break;
+        }
+
+        if (ReadType == Chars) {
+            read();
+        }
+        else {
+            CharacterOffset = -1;
+            break;
+        }
+    }
+
+    return s - buf;
+}
+
+void Base::XMLReader::endCharStream()
+{
+    CharacterOffset = -1;
+    CharStream.reset();
+}
+
+std::istream& Base::XMLReader::charStream()
+{
+    if (!CharStream) {
+        throw Base::XMLParseException("no current character stream");
+    }
+    return *CharStream;
+}
+
+std::istream& Base::XMLReader::beginCharStream()
+{
+    if (CharStream) {
+        throw Base::XMLParseException("recursive character stream");
+    }
+
+    // TODO: An XML element can actually contain a mix of child elements and
+    // characters. So we should not actually demand 'StartElement' here. But
+    // with the current implementation of character stream, we cannot track
+    // child elements and character content at the same time.
+    if (ReadType == StartElement) {
+        CharacterOffset = 0;
+        read();
+    }
+    else if (ReadType == StartEndElement) {
+        // If we are currently at a self-closing element, just leave the offset
+        // as negative and do not read any characters. This will result in an
+        // empty input stream for the caller.
+        CharacterOffset = -1;
+    }
+    else {
+        throw Base::XMLParseException("invalid state while reading character stream");
+    }
+
+    CharStream = std::make_unique<boost::iostreams::filtering_istream>();
+    auto* filteringStream = dynamic_cast<boost::iostreams::filtering_istream*>(CharStream.get());
+    filteringStream->push(boost::ref(*this));
+    return *CharStream;
+}
+
 void Base::XMLReader::readBinFile(const char* filename)
 {
    Base::FileInfo fi(filename);
--- a/src/Base/Reader.h
+++ b/src/Base/Reader.h
@@ -33,6 +33,8 @@
 #include <xercesc/sax2/Attributes.hpp>
 #include <xercesc/sax2/DefaultHandler.hpp>

+#include <boost/iostreams/concepts.hpp>
+
 #include "FileInfo.h"


@@ -127,6 +129,13 @@ public:
    XMLReader(const char* FileName, std::istream&);
    ~XMLReader() override;

+    /** @name boost iostream device interface */
+    //@{
+    using category = boost::iostreams::source_tag;
+    using char_type = char;
+    std::streamsize read(char_type* s, std::streamsize n);
+    //@}
+
    bool isValid() const { return _valid; }
    bool isVerbose() const { return _verbose; }
    void setVerbose(bool on) { _verbose = on; }
@@ -157,6 +166,20 @@ public:
    void readEndElement(const char* ElementName=nullptr, int level=-1);
    /// read until characters are found
    void readCharacters();
+
+    /** Obtain an input stream for reading characters
+     *
+     *  @return Return a input stream for reading characters. The stream will be
+     *  auto destroyed when you call with readElement() or readEndElement(), or
+     *  you can end it explicitly with endCharStream().
+     */
+    std::istream &beginCharStream();
+    /// Manually end the current character stream
+    void endCharStream();
+    /// Obtain the current character stream
+    std::istream &charStream();
+    //@}
+
    /// read binary file
    void readBinFile(const char*);
    //@}
@@ -259,6 +282,7 @@ protected:
    std::string LocalName;
    std::string Characters;
    unsigned int CharacterCount;
+    std::streamsize CharacterOffset{-1};

    std::map<std::string,std::string> AttrMap;
    using AttrMapType = std::map<std::string,std::string>;
@@ -285,6 +309,8 @@ protected:
    std::vector<std::string> FileNames;

    std::bitset<32> StatusBits;
+
+    std::unique_ptr<std::istream> CharStream;
 };

 class BaseExport Reader : public std::istream
--- a/src/Base/Writer.cpp
+++ b/src/Base/Writer.cpp
@@ -25,6 +25,7 @@

 #include <limits>
 #include <locale>
+#include <iomanip>

 #include "Writer.h"
 #include "Base64.h"
@@ -34,11 +35,43 @@
 #include "Stream.h"
 #include "Tools.h"

+#include <boost/iostreams/filtering_stream.hpp>
+#include <memory>

 using namespace Base;
 using namespace std;
 using namespace zipios;

+// boost iostream filter to escape ']]>' in text file saved into CDATA section.
+// It does not check if the character is valid utf8 or not.
+struct cdata_filter {
+
+    typedef char char_type;
+    typedef boost::iostreams::output_filter_tag category;
+
+    template<typename Device>
+    inline bool put(Device& dev, char c) {
+        switch(state) {
+            case 0:
+            case 1:
+                if(c == ']')
+                    ++state;
+                else
+                    state = 0;
+                break;
+            case 2:
+                if(c == '>') {
+                    static const char escape[] = "]]><![CDATA[";
+                    boost::iostreams::write(dev,escape,sizeof(escape)-1);
+                }
+                state = 0;
+                break;
+        }
+        return boost::iostreams::put(dev,c);
+    }
+
+    int state = 0;
+};

 // ---------------------------------------------------------------------------
 //  Writer: Constructors and Destructor
@@ -55,6 +88,44 @@ Writer::Writer()

 Writer::~Writer() = default;

+std::ostream& Writer::beginCharStream()
+{
+    if (CharStream) {
+        throw Base::RuntimeError("Writer::beginCharStream(): invalid state");
+    }
+
+    Stream() << "<![CDATA[";
+    CharStream = std::make_unique<boost::iostreams::filtering_ostream>();
+    auto* filteredStream = dynamic_cast<boost::iostreams::filtering_ostream*>(CharStream.get());
+    filteredStream->push(cdata_filter());
+    filteredStream->push(Stream());
+    *filteredStream << std::setprecision(std::numeric_limits<double>::digits10 + 1);
+    return *CharStream;
+}
+
+std::ostream& Writer::endCharStream()
+{
+    if (CharStream) {
+        CharStream.reset();
+        Stream() << "]]>";
+    }
+    return Stream();
+}
+
+std::ostream& Writer::charStream()
+{
+    if (!CharStream) {
+        throw Base::RuntimeError("Writer::endCharStream(): no current character stream");
+    }
+    return *CharStream;
+}
+
+void Writer::insertText(const std::string& s)
+{
+    beginCharStream() << s;
+    endCharStream();
+}
+
 void Writer::insertAsciiFile(const char* FileName)
 {
    Base::FileInfo fi(FileName);
--- a/src/Base/Writer.h
+++ b/src/Base/Writer.h
@@ -72,6 +72,8 @@ public:
    void insertAsciiFile(const char* FileName);
    /// insert a binary file BASE64 coded as CDATA section in the XML file
    void insertBinFile(const char* FileName);
+    /// insert text string as CDATA
+    void insertText(const std::string &s);

    /** @name additional file writing */
    //@{
@@ -115,6 +117,23 @@ public:

    virtual std::ostream &Stream()=0;

+    /** Create an output stream for storing character content
+     * The input is assumed to be valid character with
+     * the current XML encoding, and will be enclosed inside
+     * CDATA section.  The stream will scan the input and
+     * properly escape any CDATA ending inside.
+     * @return Returns an output stream.
+     *
+     * You must call endCharStream() to end the current character stream.
+     */
+    std::ostream &beginCharStream();
+    /** End the current character output stream
+     * @return Returns the normal writer stream for convenience
+     */
+    std::ostream &endCharStream();
+    /// Return the current character output stream
+    std::ostream &charStream();
+
    /// name for underlying file saves
    std::string ObjectName;

@@ -138,6 +157,8 @@ protected:
 private:
    Writer(const Writer&);
    Writer& operator=(const Writer&);
+
+    std::unique_ptr<std::ostream> CharStream;
 };


--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -37,6 +37,7 @@ endif()
 add_executable(Tests_run)
 add_subdirectory(lib)
 add_subdirectory(src)
+target_include_directories(Tests_run PUBLIC ${Python3_INCLUDE_DIRS})
 target_link_libraries(Tests_run gtest_main ${Google_Tests_LIBS} FreeCADApp)

 add_executable(Sketcher_tests_run)
--- a/tests/src/App/CMakeLists.txt
+++ b/tests/src/App/CMakeLists.txt
@@ -1,6 +1,7 @@
 target_sources(
    Tests_run
        PRIVATE
+            ${CMAKE_CURRENT_SOURCE_DIR}/Application.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/Branding.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/Expression.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/ElementMap.cpp
@@ -9,5 +10,5 @@ target_sources(
            ${CMAKE_CURRENT_SOURCE_DIR}/MappedElement.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/MappedName.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/Metadata.cpp
-            ${CMAKE_CURRENT_SOURCE_DIR}/Application.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/StringHasher.cpp
 )
--- a/tests/src/App/StringHasher.cpp
+++ b/tests/src/App/StringHasher.cpp
--- a/tests/src/Base/Bitmask.cpp
+++ b/tests/src/Base/Bitmask.cpp
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+#include "gtest/gtest.h"
+
+#include <Base/Bitmask.h>
+
+enum class TestFlagEnum {
+    Flag1,
+    Flag2,
+    Flag3
+};
+
+class BitmaskTest: public ::testing::Test
+{
+protected:
+    // void SetUp() override {};
+    // void TearDown() override {};
+};
+
+TEST_F(BitmaskTest, toUnderlyingType)
+{
+    // Arrange
+    Base::Flags<TestFlagEnum> flag1 {TestFlagEnum::Flag1};
+
+    // Act
+    auto result = flag1.toUnderlyingType();
+
+    // Assert
+    EXPECT_EQ(typeid(result), typeid(int));
+}
--- a/tests/src/Base/CMakeLists.txt
+++ b/tests/src/Base/CMakeLists.txt
@@ -1,9 +1,12 @@
 target_sources(
    Tests_run
        PRIVATE
+            ${CMAKE_CURRENT_SOURCE_DIR}/Bitmask.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/Matrix.cpp
-            ${CMAKE_CURRENT_SOURCE_DIR}/Rotation.cpp
-            ${CMAKE_CURRENT_SOURCE_DIR}/tst_Tools.cpp
-            ${CMAKE_CURRENT_SOURCE_DIR}/Unit.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/Quantity.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/Reader.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/Rotation.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/Unit.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/Writer.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/tst_Tools.cpp
 )
--- a/tests/src/Base/Reader.cpp
+++ b/tests/src/Base/Reader.cpp
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+#include "gtest/gtest.h"
+
+#include "Base/Exception.h"
+#include "Base/Reader.h"
+#include <array>
+#include <filesystem>
+#include <fmt/format.h>
+#include <fstream>
+
+namespace fs = std::filesystem;
+
+class ReaderTest: public ::testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        xercesc_3_2::XMLPlatformUtils::Initialize();
+        _tempDir = fs::temp_directory_path();
+        std::string filename = "unit_test_Reader.xml";
+        _tempFile = _tempDir / filename;
+    }
+
+    void TearDown() override
+    {
+        if (std::filesystem::exists(_tempFile)) {
+            std::filesystem::remove(_tempFile);
+        }
+    }
+
+    void givenDataAsXMLStream(const std::string& data)
+    {
+        auto stringData =
+            R"(<?xml version="1.0" encoding="UTF-8"?><document>)" + data + "</document>";
+        std::istringstream stream(stringData);
+        std::ofstream fileStream(_tempFile);
+        fileStream.write(stringData.data(), static_cast<std::streamsize>(stringData.length()));
+        fileStream.close();
+        std::ifstream inputStream(_tempFile);
+        _reader = std::make_unique<Base::XMLReader>(_tempFile.string().c_str(), inputStream);
+    }
+
+    Base::XMLReader* Reader()
+    {
+        return _reader.get();
+    }
+
+private:
+    std::unique_ptr<Base::XMLReader> _reader;
+    fs::path _tempDir;
+    fs::path _tempFile;
+};
+
+TEST_F(ReaderTest, beginCharStreamNormal)
+{
+    // Arrange
+    givenDataAsXMLStream("<data>Test ASCII data</data>");
+    Reader()->readElement("data");
+
+    // Act
+    auto& result = Reader()->beginCharStream();
+
+    // Assert
+    EXPECT_TRUE(result.good());
+}
+
+TEST_F(ReaderTest, beginCharStreamOpenClose)
+{
+    // Arrange
+    givenDataAsXMLStream("<data id='12345' />");
+    Reader()->readElement("data");
+
+    // Act
+    auto& result = Reader()->beginCharStream();// Not an error, even though there is no data
+
+    // Assert
+    EXPECT_TRUE(result.good());
+}
+
+TEST_F(ReaderTest, beginCharStreamAlreadyBegun)
+{
+    // Arrange
+    givenDataAsXMLStream("<data>Test ASCII data</data>");
+    Reader()->readElement("data");
+    Reader()->beginCharStream();
+
+    // Act & Assert
+    EXPECT_THROW(Reader()->beginCharStream(), Base::XMLParseException);
+}
+
+TEST_F(ReaderTest, charStreamGood)
+{
+    // Arrange
+    givenDataAsXMLStream("<data>Test ASCII data</data>");
+    Reader()->readElement("data");
+    Reader()->beginCharStream();
+
+    // Act
+    auto& result = Reader()->charStream();
+
+    // Assert
+    EXPECT_TRUE(result.good());
+}
+
+TEST_F(ReaderTest, charStreamBad)
+{
+    // Arrange
+    givenDataAsXMLStream("<data>Test ASCII data</data>");
+    Reader()->readElement("data");
+
+    // Act & Assert
+    EXPECT_THROW(Reader()->charStream(), Base::XMLParseException);
+}
+
+TEST_F(ReaderTest, endCharStreamGood)
+{
+    // Arrange
+    givenDataAsXMLStream("<data>Test ASCII data</data>");
+    Reader()->readElement("data");
+    Reader()->beginCharStream();
+
+    // Act & Assert
+    Reader()->endCharStream();// Does not throw
+}
+
+TEST_F(ReaderTest, endCharStreamBad)
+{
+    // Arrange
+    givenDataAsXMLStream("<data>Test ASCII data</data>");
+    Reader()->readElement("data");
+    // Do not open the stream...
+
+    // Act & Assert
+    Reader()->endCharStream();// Does not throw, even with no open stream
+}
+
+TEST_F(ReaderTest, readDataSmallerThanBuffer)
+{
+    // Arrange
+    constexpr size_t bufferSize {20};
+    std::string expectedData {"Test ASCII data"};
+    givenDataAsXMLStream("<data>" + expectedData + "</data>");
+    Reader()->readElement("data");
+    Reader()->beginCharStream();
+    std::array<char, bufferSize> buffer {};
+
+    // Act
+    auto bytesRead = Reader()->read(buffer.data(), bufferSize);
+
+    // Assert
+    EXPECT_STREQ(expectedData.c_str(), buffer.data());
+    EXPECT_EQ(expectedData.length(), bytesRead);
+}
+
+TEST_F(ReaderTest, readDataLargerThanBuffer)
+{
+    // Arrange
+    constexpr size_t bufferSize {5};
+    std::string expectedData {"Test ASCII data"};
+    givenDataAsXMLStream("<data>" + expectedData + "</data>");
+    Reader()->readElement("data");
+    Reader()->beginCharStream();
+    std::array<char, bufferSize> buffer {};
+
+    // Act
+    auto bytesRead = Reader()->read(buffer.data(), bufferSize);
+
+    // Assert
+    for (size_t i = 0; i < bufferSize; ++i) {
+        EXPECT_EQ(expectedData[i], buffer.at(i));
+    }
+    EXPECT_EQ(bufferSize, bytesRead);
+}
+
+TEST_F(ReaderTest, readDataLargerThanBufferSecondRead)
+{
+    // Arrange
+    constexpr size_t bufferSize {5};
+    std::string expectedData {"Test ASCII data"};
+    givenDataAsXMLStream("<data>" + expectedData + "</data>");
+    Reader()->readElement("data");
+    Reader()->beginCharStream();
+    std::array<char, bufferSize> buffer {};
+    Reader()->read(buffer.data(), bufferSize);// Read the first five bytes
+
+    // Act
+    auto bytesRead = Reader()->read(buffer.data(), bufferSize);// Second five bytes
+
+    // Assert
+    for (size_t i = 0; i < bufferSize; ++i) {
+        EXPECT_EQ(expectedData[i + bufferSize], buffer.at(i));
+    }
+    EXPECT_EQ(bufferSize, bytesRead);
+}
+
+
+TEST_F(ReaderTest, readDataNotStarted)
+{
+    // Arrange
+    constexpr size_t bufferSize {20};
+    std::string expectedData {"Test ASCII data"};
+    givenDataAsXMLStream("<data>" + expectedData + "</data>");
+    Reader()->readElement("data");
+    std::array<char, bufferSize> buffer {};
+
+    // Act
+    auto bytesRead = Reader()->read(buffer.data(), bufferSize);
+
+    // Assert
+    EXPECT_EQ(-1, bytesRead);// Because we didn't call beginCharStream
+}
--- a/tests/src/Base/Writer.cpp
+++ b/tests/src/Base/Writer.cpp
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+#include "gtest/gtest.h"
+
+#include "Base/Exception.h"
+#include "Base/Writer.h"
+
+// Writer is designed to be a base class, so for testing we actually instantiate a StringWriter,
+// which is derived from it
+
+class WriterTest : public ::testing::Test {
+protected:
+    //void SetUp() override {}
+
+    // void TearDown() override {}
+protected:
+    Base::StringWriter _writer;
+};
+
+TEST_F(WriterTest, insertTextSimple)
+{
+    // Arrange
+    std::string testTextData {"Simple ASCII data"};
+    std::string expectedResult {"<![CDATA[" + testTextData + "]]>"};
+
+    // Act
+    _writer.insertText(testTextData);
+
+    // Assert
+    EXPECT_EQ(expectedResult, _writer.getString());
+}
+
+/// If the data happens to actually include an XML CDATA close marker, that needs to be "escaped" --
+/// this is done by breaking it up into two separate CDATA sections, splitting apart the marker.
+TEST_F(WriterTest, insertTextNeedsEscape)
+{
+    // Arrange
+    std::string testDataA {"ASCII data with a close marker in it, like so: ]]"};
+    std::string testDataB {"> "};
+    std::string expectedResult {"<![CDATA[" + testDataA + "]]><![CDATA[" + testDataB + "]]>"};
+
+    // Act
+    _writer.insertText(testDataA + testDataB);
+
+    // Assert
+    EXPECT_EQ(expectedResult, _writer.getString());
+}
+
+TEST_F(WriterTest, insertNonAsciiData)
+{
+    // Arrange
+    std::string testData {"\x01\x02\x03\x04\u0001F450😀"};
+    std::string expectedResult {"<![CDATA[" + testData + "]]>"};
+
+    // Act
+    _writer.insertText(testData);
+
+    // Assert
+    EXPECT_EQ(expectedResult, _writer.getString());
+}
+
+TEST_F(WriterTest, beginCharStream)
+{
+    // Arrange & Act
+    auto & checkStream {_writer.beginCharStream()};
+
+    // Assert
+    EXPECT_TRUE(checkStream.good());
+}
+
+TEST_F(WriterTest, beginCharStreamTwice)
+{
+    // Arrange
+    _writer.beginCharStream();
+
+    // Act & Assert
+    EXPECT_THROW(
+        _writer.beginCharStream(),
+        Base::RuntimeError
+    );
+}
+
+TEST_F(WriterTest, endCharStream)
+{
+    // Arrange
+    _writer.beginCharStream();
+
+    // Act
+    _writer.endCharStream();
+
+    // Assert
+    EXPECT_EQ("<![CDATA[]]>", _writer.getString());
+}
+
+TEST_F(WriterTest, endCharStreamTwice)
+{
+    // Arrange
+    _writer.beginCharStream();
+    _writer.endCharStream();
+
+    // Act
+    _writer.endCharStream(); // Doesn't throw, or do anything at all
+
+    // Assert
+    EXPECT_EQ("<![CDATA[]]>", _writer.getString());
+}
+
+TEST_F(WriterTest, charStream)
+{
+    // Arrange
+    auto& streamA {_writer.beginCharStream()};
+
+    // Act
+    auto& streamB {_writer.charStream()};
+
+    // Assert
+    EXPECT_EQ(&streamA, &streamB);
+}