From 0a2eacade2f7cfda78f8abec1feebadd95dd01f5 Mon Sep 17 00:00:00 2001 From: Chris Hennes Date: Fri, 31 Mar 2023 14:23:42 -0500 Subject: [PATCH] App/Toponaming: Lint and compilation cleanup --- src/App/StringHasher.cpp | 1381 ++++++++++++++++++++----------------- src/App/StringHasher.h | 1195 +++++++++++++++++--------------- src/App/StringIDPyImp.cpp | 33 +- src/Base/Writer.cpp | 28 +- tests/src/Base/Reader.cpp | 3 + tests/src/Base/Writer.cpp | 3 + 6 files changed, 1424 insertions(+), 1219 deletions(-) create mode 100644 tests/src/Base/Reader.cpp create mode 100644 tests/src/Base/Writer.cpp diff --git a/src/App/StringHasher.cpp b/src/App/StringHasher.cpp index 879bc1b652..7fb8f3a83d 100644 --- a/src/App/StringHasher.cpp +++ b/src/App/StringHasher.cpp @@ -1,32 +1,34 @@ -/**************************************************************************** -* Copyright (c) 2022 Zheng Lei (realthunder) * -* * -* This file is part of the FreeCAD CAx development system. * -* * -* This library is free software; you can redistribute it and/or * -* modify it under the terms of the GNU Library General Public * -* License as published by the Free Software Foundation; either * -* version 2 of the License, or (at your option) any later version. * -* * -* This library is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU Library General Public License for more details. * -* * -* You should have received a copy of the GNU Library General Public * -* License along with this library; see the file COPYING.LIB. If not, * -* write to the Free Software Foundation, Inc., 59 Temple Place, * -* Suite 330, Boston, MA 02111-1307, USA * -* * -****************************************************************************/ +// SPDX-License-Identifier: LGPL-2.1-or-later + +/*************************************************************************************************** + * * + * Copyright (c) 2022 Zheng, Lei (realthunder) * + * Copyright (c) 2023 FreeCAD Project Association * + * * + * This file is part of FreeCAD. * + * * + * FreeCAD is free software: you can redistribute it and/or modify it under the terms of the * + * GNU Lesser General Public License as published by the Free Software Foundation, either * + * version 2.1 of the License, or (at your option) any later version. * + * * + * FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public License along with * + * FreeCAD. If not, see . * + * * + **************************************************************************************************/ #include "PreCompiled.h" -#include #include #include +#include +#include #include +#include #include #include @@ -36,48 +38,50 @@ #include #include +#include "MappedElement.h" #include "StringHasher.h" #include "StringHasherPy.h" -#include "DocumentParams.h" -#include "MappedElement.h" #include "StringIDPy.h" -FC_LOG_LEVEL_INIT("App",true,true) +FC_LOG_LEVEL_INIT("App", true, true) namespace bio = boost::iostreams; using namespace App; /////////////////////////////////////////////////////////// -struct StringIDHasher { - std::size_t operator()(const StringID *sid) const { - if (!sid) - return 0; - return qHash(sid->data(), qHash(sid->postfix())); - } +struct StringIDHasher +{ + std::size_t operator()(const StringID* sid) const + { + if (!sid) { + return 0; + } + return qHash(sid->data(), qHash(sid->postfix())); + } - bool operator()(const StringID *IDa, const StringID *IDb) const { - if (IDa == IDb) - return true; - if (!IDa || !IDb) - return false; - return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix(); - } + bool operator()(const StringID* IDa, const StringID* IDb) const + { + if (IDa == IDb) { + return true; + } + if (!IDa || !IDb) { + return false; + } + return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix(); + } }; -typedef boost::bimap< - boost::bimaps::unordered_set_of, - boost::bimaps::set_of> - HashMapBase; +using HashMapBase = + boost::bimap, + boost::bimaps::set_of>; class StringHasher::HashMap: public HashMapBase { public: - bool SaveAll = false; - int Threshold = 0; + bool SaveAll = false; + int Threshold = 0; }; /////////////////////////////////////////////////////////// @@ -86,68 +90,81 @@ TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass) StringID::~StringID() { - if (_hasher) - _hasher->_hashes->right.erase(_id); + if (_hasher) { + _hasher->_hashes->right.erase(_id); + } } -PyObject *StringID::getPyObject() { - return new StringIDPy(this); +PyObject* StringID::getPyObject() +{ + return new StringIDPy(this); } -PyObject *StringID::getPyObjectWithIndex(int index) { - auto res = new StringIDPy(this); - res->_index = index; - return res; +PyObject* StringID::getPyObjectWithIndex(int index) +{ + auto* res = new StringIDPy(this); + res->_index = index; + return res; } -std::string StringID::toString(int index) const { - std::ostringstream ss; - ss << '#' << std::hex << value(); - if (index) - ss << ':' << index; - return ss.str(); +std::string StringID::toString(int index) const +{ + std::ostringstream ss; + ss << '#' << std::hex << value(); + if (index != 0) { + ss << ':' << index; + } + return ss.str(); } -StringID::IndexID StringID::fromString(const char *name, bool eof, int size) { - IndexID res; - res.id = 0; - res.index = 0; - if (!name) { - res.id = -1; - return res; - } - if (size < 0) - size = std::strlen(name); - bio::stream iss(name, size); - char sep = 0; - char sep2 = 0; - iss >> sep >> std::hex >> res.id >> sep2 >> res.index; - if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) { - res.id = -1; - return res; - } - return res; +StringID::IndexID StringID::fromString(const char* name, bool eof, int size) +{ + IndexID res {}; + res.id = 0; + res.index = 0; + if (!name) { + res.id = -1; + return res; + } + if (size < 0) { + size = static_cast(std::strlen(name)); + } + bio::stream iss(name, size); + char sep = 0; + char sep2 = 0; + iss >> sep >> std::hex >> res.id >> sep2 >> res.index; + if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) { + res.id = -1; + return res; + } + return res; } -std::string StringID::dataToText(int index) const { - if(isHashed() || isBinary()) - return _data.toBase64().constData(); +std::string StringID::dataToText(int index) const +{ + if (isHashed() || isBinary()) { + return _data.toBase64().constData(); + } - std::string res(_data.constData()); - if (index) - res += std::to_string(index); - if (_postfix.size()) - res += _postfix.constData(); - return res; + std::string res(_data.constData()); + if (index != 0) { + res += std::to_string(index); + } + if (_postfix.size() != 0) { + res += _postfix.constData(); + } + return res; } void StringID::mark() const { - if (isMarked()) - return; - _flags.setFlag(Flag::Marked); - for (auto &sid : _sids) - sid.deref().mark(); + if (isMarked()) { + return; + } + _flags.setFlag(Flag::Marked); + for (auto& sid : _sids) { + sid.deref().mark(); + } } /////////////////////////////////////////////////////////// @@ -155,595 +172,667 @@ void StringID::mark() const TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence) StringHasher::StringHasher() - :_hashes(new HashMap) + : _hashes(new HashMap) {} -StringHasher::~StringHasher() { - clear(); +StringHasher::~StringHasher() +{ + clear(); } -void StringHasher::setSaveAll(bool enable) { - if (_hashes->SaveAll == enable) - return; - _hashes->SaveAll = enable; - compact(); +void StringHasher::setSaveAll(bool enable) +{ + if (_hashes->SaveAll == enable) { + return; + } + _hashes->SaveAll = enable; + compact(); } void StringHasher::compact() { - if (_hashes->SaveAll) - return; + if (_hashes->SaveAll) { + return; + } - std::deque pendings; - for (auto &v : _hashes->right) { - if (!v.second->isPersistent() && v.second->getRefCount() == 1) - pendings.emplace_back(v.second); - } - while (pendings.size()) { - StringIDRef sid = pendings.front(); - pendings.pop_front(); - if (!_hashes->right.erase(sid.value())) - continue; - sid._sid->_hasher = nullptr; - sid._sid->unref(); - for (auto &s : sid._sid->_sids) { - if (s._sid->_hasher == this - && !s._sid->isPersistent() - && s._sid->getRefCount() == 2) - pendings.push_back(s); - } - } + std::deque pendings; + for (auto& hasher : _hashes->right) { + if (!hasher.second->isPersistent() && hasher.second->getRefCount() == 1) { + pendings.emplace_back(hasher.second); + } + } + while (!pendings.empty()) { + StringIDRef sid = pendings.front(); + pendings.pop_front(); + if (_hashes->right.erase(sid.value()) == 0U) { + continue; + } + sid._sid->_hasher = nullptr; + sid._sid->unref(); + for (auto& hasher : sid._sid->_sids) { + if (hasher._sid->_hasher == this && !hasher._sid->isPersistent() + && hasher._sid->getRefCount() == 2) { + pendings.push_back(hasher); + } + } + } } -bool StringHasher::getSaveAll() const { - return _hashes->SaveAll; -} - -void StringHasher::setThreshold(int threshold) { - _hashes->Threshold = threshold; -} - -int StringHasher::getThreshold() const { - return _hashes->Threshold; -} - -long StringHasher::lastID() const { - if(_hashes->right.empty()) - return 0; - auto it = _hashes->right.end(); - --it; - return it->first; -} - -StringIDRef StringHasher::getID(const char *text, int len, bool hashable) { - if (len < 0) - len = strlen(text); - return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None); -} - -StringIDRef StringHasher::getID(const QByteArray &data, Options options) +bool StringHasher::getSaveAll() const { - bool binary = options.testFlag(Option::Binary); - bool hashable = options.testFlag(Option::Hashable); - bool nocopy = options.testFlag(Option::NoCopy); - - bool hashed = hashable && _hashes->Threshold > 0 - && (int)data.size() > _hashes->Threshold; - - StringID dataID; - if (hashed) { - QCryptographicHash hasher(QCryptographicHash::Sha1); - hasher.addData(data); - dataID._data = hasher.result(); - } - else - dataID._data = data; - - auto it = _hashes->left.find(&dataID); - if (it != _hashes->left.end()) - return StringIDRef(it->first); - - if (!hashed && !nocopy) - // if not hashed, make a deep copy of the data - dataID._data = QByteArray(data.constData(), data.size()); - - StringID::Flags flags(StringID::Flag::None); - if (binary) - flags.setFlag(StringID::Flag::Binary); - if (hashed) - flags.setFlag(StringID::Flag::Hashed); - StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags)); - return StringIDRef(insert(sid)); + return _hashes->SaveAll; } -StringIDRef StringHasher::getID(const Data::MappedName &name, - const QVector & sids) +void StringHasher::setThreshold(int threshold) { - StringID anID; - anID._postfix = name.postfixBytes(); - - Data::IndexedName indexed; - if (!anID._postfix.size()) - indexed = Data::IndexedName(name.dataBytes()); - if (indexed) - anID._data = QByteArray::fromRawData(indexed.getType(), strlen(indexed.getType())); - else - anID._data = name.dataBytes(); - - auto it = _hashes->left.find(&anID); - if (it != _hashes->left.end()) { - auto res = StringIDRef(it->first); - if (indexed) - res._index = indexed.getIndex(); - return res; - } - - if (!indexed && name.isRaw()) - anID._data = QByteArray(name.dataBytes().constData(), - name.dataBytes().size()); - - StringIDRef postfixRef; - if (anID._postfix.size() && anID._postfix.indexOf("#") < 0) { - postfixRef = getID(anID._postfix); - postfixRef.toBytes(anID._postfix); - } - - StringIDRef indexRef; - if (indexed) - indexRef = getID(anID._data); - - StringIDRef sid(new StringID(lastID() + 1, anID._data)); - StringID &id = *sid._sid; - if (anID._postfix.size()) { - id._flags.setFlag(StringID::Flag::Postfixed); - id._postfix = anID._postfix; - } - - int count = 0; - for (auto &sid : sids) { - if (sid && sid._sid->_hasher == this) - ++count; - } - - int extra = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0); - if (count == sids.size() && !postfixRef && !indexRef) - id._sids = sids; - else { - id._sids.reserve(count + extra); - if (postfixRef) { - id._flags.setFlag(StringID::Flag::PostfixEncoded); - id._sids.push_back(postfixRef); - } - if (indexRef) { - id._flags.setFlag(StringID::Flag::Indexed); - id._sids.push_back(indexRef); - } - for (auto &s : sids) { - if (s && s._sid->_hasher == this) - id._sids.push_back(s); - } - } - if (id._sids.size() > 10) { - std::sort(id._sids.begin() + extra, id._sids.end()); - id._sids.erase(std::unique(id._sids.begin() + extra, id._sids.end()), id._sids.end()); - } - - if (id._postfix.size() && !indexed) { - StringID::IndexID res = StringID::fromString(id._data); - if (res.id > 0) { - int offset = id.isPostfixEncoded() ? 1 : 0; - for (int i = offset; i < id._sids.size(); ++i) { - if (id._sids[i].value() == res.id) { - if (i!=offset) - std::swap(id._sids[offset], id._sids[i]); - if (res.index != 0) - id._flags.setFlag(StringID::Flag::PrefixIDIndex); - else - id._flags.setFlag(StringID::Flag::PrefixID); - break; - } - } - } - } - - return StringIDRef(insert(sid), indexed.getIndex()); + _hashes->Threshold = threshold; } -StringIDRef StringHasher::getID(long id, int index) const { - if (id<=0) - return StringIDRef(); - auto it = _hashes->right.find(id); - if (it == _hashes->right.end()) - return StringIDRef(); - StringIDRef res(it->second); - res._index = index; - return res; -} - -void StringHasher::setPersistenceFileName(const char *filename) const { - if (!filename) - filename = ""; - _filename = filename; -} - -const std::string &StringHasher::getPersistenceFileName() const { - return _filename; -} - -void StringHasher::Save(Base::Writer &writer) const { - - size_t count; - if (_hashes->SaveAll) - count = _hashes->size(); - else { - count = 0; - for (auto &v : _hashes->right) { - if (v.second->isMarked() || v.second->isPersistent()) - ++count; - } - } - - writer.Stream() << writer.ind() - << "SaveAll - << "\" threshold=\"" << _hashes->Threshold << "\""; - - if (!count) { - writer.Stream() << " count=\"0\">\n"; - return; - } - - writer.Stream() << " count=\"0\" new=\"1\"/>\n"; - - writer.Stream() << writer.ind() << "\n"; - return; - } - - writer.Stream() << " count=\"" << count << "\">\n"; - saveStream(writer.beginCharStream(false) << '\n'); - writer.endCharStream() << '\n'; - writer.Stream() << writer.ind() << "\n"; -} - -void StringHasher::SaveDocFile(Base::Writer &writer) const { - std::size_t count = _hashes->SaveAll?this->size():this->count(); - writer.Stream() << "StringTableStart v1 " << count << '\n'; - saveStream(writer.Stream()); -} - -void StringHasher::saveStream(std::ostream &s) const { - Base::OutputStream str(s,false); - boost::io::ios_flags_saver ifs(s); - s << std::hex; - - bool allowRealtive = DocumentParams::getRelativeStringID(); - long anchor = 0; - const StringID *last = nullptr; - long lastid = 0; - bool relative = false; - - for (auto &v : _hashes->right) { - auto &d = *v.second; - long id = d._id; - if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) - continue; - - if (!allowRealtive) - s << id; - else { - // We use relative coding to save space. But in order to have some - // minimum protection against corruption, write an absolute value every - // once a while. - relative = (id - anchor) < 1000; - if (relative) - s << '-' << id - lastid; - else { - anchor = id; - s << id; - } - lastid = id; - } - - int offset = d.isPostfixEncoded() ? 1 : 0; - - StringID::IndexID prefixid; - prefixid.id = 0; - prefixid.index = 0; - if (d.isPrefixID()) { - assert(d._sids.size() > offset); - prefixid.id = d._sids[offset].value(); - } - else if (d.isPrefixIDIndex()) { - prefixid = StringID::fromString(d._data); - assert(d._sids.size() > offset && d._sids[offset].value() == prefixid.id); - } - - auto flags = d._flags; - flags.setFlag(StringID::Flag::Marked, false); - s << '.' << flags.toUnderlyingType(); - - int i = 0; - if (!relative) { - for (; i < d._sids.size(); ++i) - s << '.' << d._sids[i].value(); - } - else { - if (last) { - for (; i < d._sids.size() && i < last->_sids.size(); ++i) { - long m = last->_sids[i].value(); - long n = d._sids[i].value(); - if (n < m) - s << ".-" << m-n; - else - s << '.' << n - m; - } - } - for (; i < d._sids.size(); ++i) - s << '.' << id - d._sids[i].value(); - } - - last = &d; - - // Having postfix means it is a geometry element name, which - // guarantees to be a single line without space. So it is safe to - // store in raw stream. - if (d.isPostfixed()) { - if (d.isPrefixIDIndex()) - s << ' ' << prefixid.index; - else if (!d.isIndexed() && !d.isPrefixID()) - s << ' ' << d._data.constData(); - - if (!d.isPostfixEncoded()) - s << ' ' << d._postfix.constData(); - s << '\n'; - } - else { - // Reaching here means the string may contain space and newlines - // We rely on OutputStream (i.e. str) to save the string. - s << ' '; - str << d._data.constData(); - } - } -} - -void StringHasher::RestoreDocFile(Base::Reader &reader) { - std::string marker, ver; - reader >> marker; - std::size_t count; - _hashes->clear(); - if (marker == "StringTableStart") { - reader >> ver >> count; - if (ver != "v1") - FC_WARN("Unknown string table format"); - restoreStreamNew(reader, count); - return; - } - count = atoi(marker.c_str()); - restoreStream(reader, count); -} - -void StringHasher::restoreStreamNew(std::istream &s, std::size_t count) { - Base::InputStream str(s, false); - _hashes->clear(); - std::string content; - boost::io::ios_flags_saver ifs(s); - s >> std::hex; - std::vector tokens; - long lastid = 0; - const StringID *last = nullptr; - - std::string tmp; - - for (uint32_t i = 0; i < count; ++i) { - if (!(s >> tmp)) - FC_THROWM(Base::RuntimeError, "Invalid string table"); - - tokens.clear(); - boost::split(tokens, tmp, boost::is_any_of(".")); - if (tokens.size() < 2) - FC_THROWM(Base::RuntimeError, "Invalid string table"); - - long id; - bool relative = false; - if (tokens[0][0] == '-') { - relative = true; - id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16); - } - else - id = strtol(tokens[0].c_str(), nullptr, 16); - - lastid = id; - - unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16); - StringIDRef sid(new StringID(id, QByteArray(), static_cast(flag))); - - StringID &d = *sid._sid; - d._sids.reserve(tokens.size() - 2); - - int j = 2; - if (relative && last) { - for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) { - long m = last->_sids[j - 2].value(); - long n; - if (tokens[j][0] == '-') - n = -strtol(&tokens[j][1], nullptr, 16); - else - n = strtol(&tokens[j][0], nullptr, 16); - StringIDRef sid = getID(m + n); - if (!sid) - FC_THROWM(Base::RuntimeError, "Invalid string id reference"); - d._sids.push_back(sid); - } - } - for (; j < (int)tokens.size(); ++j) { - long n = strtol(&tokens[j][0], nullptr, 16); - StringIDRef sid = getID(relative ? id - n : n); - if (!sid) - FC_THROWM(Base::RuntimeError, "Invalid string id reference"); - d._sids.push_back(sid); - } - - if (!d.isPostfixed()) { - str >> content; - if (d.isHashed() || d.isBinary()) - d._data = QByteArray::fromBase64(content.c_str()); - else - d._data = content.c_str(); - } - else { - int offset = 0; - if (d.isPostfixEncoded()) { - offset = 1; - if (d._sids.empty()) - FC_THROWM(Base::RuntimeError, "Missing string postfix"); - d._postfix = d._sids[0]._sid->_data; - } - if (d.isIndexed()) { - if (d._sids.size() <= offset) - FC_THROWM(Base::RuntimeError, "Missing string prefix"); - d._data = d._sids[offset]._sid->_data; - } - else if (d.isPrefixID() || d.isPrefixIDIndex()) { - if (d._sids.size() <= offset) - FC_THROWM(Base::RuntimeError, "Missing string prefix id"); - int index = 0; - if (d.isPrefixIDIndex()) { - if (!(s >> index)) - FC_THROWM(Base::RuntimeError, "Missing string prefix index"); - } - d._data = d._sids[offset]._sid->toString(index).c_str(); - } - else { - s >> content; - d._data = content.c_str(); - } - if (!d.isPostfixEncoded()) { - s >> content; - d._postfix = content.c_str(); - } - } - - last = insert(sid); - } -} - -StringID *StringHasher::insert(const StringIDRef &sid) +int StringHasher::getThreshold() const { - assert(sid && sid._sid->_hasher == nullptr); - auto &d = *sid._sid; - d._hasher = this; - d.ref(); - auto res = _hashes->right.insert(_hashes->right.end(), - HashMap::right_map::value_type(sid.value(), &d)); - if (res->second != &d) { - d._hasher = nullptr; - d.unref(); - } - return res->second; + return _hashes->Threshold; } -void StringHasher::restoreStream(std::istream &s, std::size_t count) { - Base::InputStream str(s, false); - _hashes->clear(); - std::string content; - for (uint32_t i = 0; i < count; ++i) { - int32_t id; - uint8_t type; - str >> id >> type >> content; - StringIDRef sid = new StringID(id, QByteArray(), static_cast(type)); - if (sid.isHashed() || sid.isBinary()) - sid._sid->_data = QByteArray::fromBase64(content.c_str()); - else - sid._sid->_data = QByteArray(content.c_str()); - insert(sid); - } +long StringHasher::lastID() const +{ + if (_hashes->right.empty()) { + return 0; + } + auto it = _hashes->right.end(); + --it; + return it->first; } -void StringHasher::clear() { - for (auto & v : _hashes->right) { - v.second->_hasher = nullptr; - v.second->unref(); - } - _hashes->clear(); +StringIDRef StringHasher::getID(const char* text, int len, bool hashable) +{ + if (len < 0) { + len = static_cast(strlen(text)); + } + return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None); } -size_t StringHasher::size() const { - return _hashes->size(); +StringIDRef StringHasher::getID(const QByteArray& data, Options options) +{ + bool binary = options.testFlag(Option::Binary); + bool hashable = options.testFlag(Option::Hashable); + bool nocopy = options.testFlag(Option::NoCopy); + + bool hashed = hashable && _hashes->Threshold > 0 && (int)data.size() > _hashes->Threshold; + + StringID dataID; + if (hashed) { + QCryptographicHash hasher(QCryptographicHash::Sha1); + hasher.addData(data); + dataID._data = hasher.result(); + } + else { + dataID._data = data; + } + + auto it = _hashes->left.find(&dataID); + if (it != _hashes->left.end()) { + return {it->first}; + } + + if (!hashed && !nocopy) { + // if not hashed, make a deep copy of the data + dataID._data = QByteArray(data.constData(), data.size()); + } + + StringID::Flags flags(StringID::Flag::None); + if (binary) { + flags.setFlag(StringID::Flag::Binary); + } + if (hashed) { + flags.setFlag(StringID::Flag::Hashed); + } + StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags)); + return {insert(sid)}; } -size_t StringHasher::count() const { - size_t count = 0; - for (auto &v : _hashes->right) - if (v.second->getRefCount() > 1) - ++count; - return count; +StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector& sids) +{ + StringID anID; + anID._postfix = name.postfixBytes(); + + Data::IndexedName indexed; + if (anID._postfix.size() == 0) { + indexed = Data::IndexedName(name.dataBytes()); + } + if (indexed) { + anID._data = + QByteArray::fromRawData(indexed.getType(), static_cast(strlen(indexed.getType()))); + } + else { + anID._data = name.dataBytes(); + } + + auto it = _hashes->left.find(&anID); + if (it != _hashes->left.end()) { + auto res = StringIDRef(it->first); + if (indexed) { + res._index = indexed.getIndex(); + } + return res; + } + + if (!indexed && name.isRaw()) { + anID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size()); + } + + StringIDRef postfixRef; + if ((anID._postfix.size() != 0) && anID._postfix.indexOf("#") < 0) { + postfixRef = getID(anID._postfix); + postfixRef.toBytes(anID._postfix); + } + + StringIDRef indexRef; + if (indexed) { + indexRef = getID(anID._data); + } + + StringIDRef sid(new StringID(lastID() + 1, anID._data)); + StringID& id = *sid._sid; + if (anID._postfix.size() != 0) { + id._flags.setFlag(StringID::Flag::Postfixed); + id._postfix = anID._postfix; + } + + int count = 0; + for (const auto& hasher : sids) { + if (hasher && hasher._sid->_hasher == this) { + ++count; + } + } + + int extra = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0); + if (count == sids.size() && !postfixRef && !indexRef) { + id._sids = sids; + } + else { + id._sids.reserve(count + extra); + if (postfixRef) { + id._flags.setFlag(StringID::Flag::PostfixEncoded); + id._sids.push_back(postfixRef); + } + if (indexRef) { + id._flags.setFlag(StringID::Flag::Indexed); + id._sids.push_back(indexRef); + } + for (const auto& hasher : sids) { + if (hasher && hasher._sid->_hasher == this) { + id._sids.push_back(hasher); + } + } + } + if (id._sids.size() > 10) { + std::sort(id._sids.begin() + extra, id._sids.end()); + id._sids.erase(std::unique(id._sids.begin() + extra, id._sids.end()), id._sids.end()); + } + + if ((id._postfix.size() != 0) && !indexed) { + StringID::IndexID res = StringID::fromString(id._data); + if (res.id > 0) { + int offset = id.isPostfixEncoded() ? 1 : 0; + for (int i = offset; i < id._sids.size(); ++i) { + if (id._sids[i].value() == res.id) { + if (i != offset) { + std::swap(id._sids[offset], id._sids[i]); + } + if (res.index != 0) { + id._flags.setFlag(StringID::Flag::PrefixIDIndex); + } + else { + id._flags.setFlag(StringID::Flag::PrefixID); + } + break; + } + } + } + } + + return {insert(sid), indexed.getIndex()}; } -void StringHasher::Restore(Base::XMLReader &reader) { - clear(); - reader.readElement("StringHasher"); - _hashes->SaveAll = reader.getAttributeAsInteger("saveall") ? true : false; - _hashes->Threshold = reader.getAttributeAsInteger("threshold"); - - bool newtag = false; - if (reader.getAttributeAsInteger("new", "0") > 0) { - reader.readElement("StringHasher2"); - newtag = true; - } - - if (reader.hasAttribute("file")) { - const char *file = reader.getAttribute("file"); - if(*file) - reader.addFile(file, this); - return; - } - - std::size_t count = reader.getAttributeAsUnsigned("count"); - if (newtag) { - restoreStreamNew(reader.beginCharStream(false), count); - reader.readEndElement("StringHasher2"); - return; - } - else if (count && reader.FileVersion > 1) - restoreStream(reader.beginCharStream(false), count); - else { - for (std::size_t i = 0; i < count; ++i) { - reader.readElement("Item"); - StringIDRef sid; - long id = reader.getAttributeAsInteger("id"); - bool hashed = reader.hasAttribute("hash"); - if (hashed || reader.hasAttribute("data")) { - const char *value = hashed ? reader.getAttribute("hash") : reader.getAttribute("data"); - sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed); - } - else - sid = new StringID(id, QByteArray(reader.getAttribute("text"))); - insert(sid); - } - } - reader.readEndElement("StringHasher"); +StringIDRef StringHasher::getID(long id, int index) const +{ + if (id <= 0) { + return {}; + } + auto it = _hashes->right.find(id); + if (it == _hashes->right.end()) { + return {}; + } + StringIDRef res(it->second); + res._index = index; + return res; } -unsigned int StringHasher::getMemSize (void) const { - return (_hashes->SaveAll?size():count()) * 10; +void StringHasher::setPersistenceFileName(const char* filename) const +{ + if (!filename) { + filename = ""; + } + _filename = filename; } -PyObject *StringHasher::getPyObject() { - return new StringHasherPy(this); +const std::string& StringHasher::getPersistenceFileName() const +{ + return _filename; } -std::map StringHasher::getIDMap() const { - std::map ret; - for (auto &v : _hashes->right) - ret.emplace_hint(ret.end(), v.first, StringIDRef(v.second)); - return ret; +void StringHasher::Save(Base::Writer& writer) const +{ + + size_t count = 0; + if (_hashes->SaveAll) { + count = _hashes->size(); + } + else { + count = 0; + for (auto& hasher : _hashes->right) { + if (hasher.second->isMarked() || hasher.second->isPersistent()) { + ++count; + } + } + } + + writer.Stream() << writer.ind() << "SaveAll + << "\" threshold=\"" << _hashes->Threshold << "\""; + + if (count == 0U) { + writer.Stream() << " count=\"0\">\n"; + return; + } + + writer.Stream() << " count=\"0\" new=\"1\"/>\n"; + + writer.Stream() << writer.ind() << "\n"; + return; + } + + writer.Stream() << " count=\"" << count << "\">\n"; + saveStream(writer.beginCharStream() << '\n'); + writer.endCharStream() << '\n'; + writer.Stream() << writer.ind() << "\n"; +} + +void StringHasher::SaveDocFile(Base::Writer& writer) const +{ + std::size_t count = _hashes->SaveAll ? this->size() : this->count(); + writer.Stream() << "StringTableStart v1 " << count << '\n'; + saveStream(writer.Stream()); +} + +void StringHasher::saveStream(std::ostream& stream) const +{ + boost::io::ios_flags_saver ifs(stream); + stream << std::hex; + + long anchor = 0; + const StringID* last = nullptr; + long lastID = 0; + bool relative = false; + + for (auto& hasher : _hashes->right) { + auto& d = *hasher.second; + long id = d._id; + if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) { + continue; + } + + // We use relative coding to save space. But in order to have some + // minimum protection against corruption, write an absolute value every + // once a while. + relative = (id - anchor) < 1000; + if (relative) { + stream << '-' << id - lastID; + } + else { + anchor = id; + stream << id; + } + lastID = id; + + int offset = d.isPostfixEncoded() ? 1 : 0; + + StringID::IndexID prefixID {}; + prefixID.id = 0; + prefixID.index = 0; + if (d.isPrefixID()) { + assert(d._sids.size() > offset); + prefixID.id = d._sids[offset].value(); + } + else if (d.isPrefixIDIndex()) { + prefixID = StringID::fromString(d._data); + assert(d._sids.size() > offset && d._sids[offset].value() == prefixID.id); + } + + auto flags = d._flags; + flags.setFlag(StringID::Flag::Marked, false); + stream << '.' << flags.toUnderlyingType(); + + int position = 0; + if (!relative) { + for (; position < d._sids.size(); ++position) { + stream << '.' << d._sids[position].value(); + } + } + else { + if (last) { + for (; position < d._sids.size() && position < last->_sids.size(); ++position) { + long m = last->_sids[position].value(); + long n = d._sids[position].value(); + if (n < m) { + stream << ".-" << m - n; + } + else { + stream << '.' << n - m; + } + } + } + for (; position < d._sids.size(); ++position) { + stream << '.' << id - d._sids[position].value(); + } + } + + last = &d; + + // Having postfix means it is a geometry element name, which + // guarantees to be a single line without space. So it is safe to + // store in raw stream. + if (d.isPostfixed()) { + if (d.isPrefixIDIndex()) { + stream << ' ' << prefixID.index; + } + else if (!d.isIndexed() && !d.isPrefixID()) { + stream << ' ' << d._data.constData(); + } + + if (!d.isPostfixEncoded()) { + stream << ' ' << d._postfix.constData(); + } + stream << '\n'; + } + else { + // Reaching here means the string may contain space and newlines + stream << ' '; + stream << std::dec << d._data.constData() << std::hex; + } + } +} + +void StringHasher::RestoreDocFile(Base::Reader& reader) +{ + std::string marker; + std::string ver; + reader >> marker; + std::size_t count = 0; + _hashes->clear(); + if (marker == "StringTableStart") { + reader >> ver >> count; + if (ver != "v1") { + FC_WARN("Unknown string table format"); + } + restoreStreamNew(reader, count); + return; + } + count = atoi(marker.c_str()); + restoreStream(reader, count); +} + +void StringHasher::restoreStreamNew(std::istream& stream, std::size_t count) +{ + _hashes->clear(); + std::string content; + boost::io::ios_flags_saver ifs(stream); + stream >> std::hex; + std::vector tokens; + long lastid = 0; + const StringID* last = nullptr; + + std::string tmp; + + for (uint32_t i = 0; i < count; ++i) { + if (!(stream >> tmp)) { + FC_THROWM(Base::RuntimeError, "Invalid string table"); + } + + tokens.clear(); + boost::split(tokens, tmp, boost::is_any_of(".")); + if (tokens.size() < 2) { + FC_THROWM(Base::RuntimeError, "Invalid string table"); + } + + long id = 0; + bool relative = false; + if (tokens[0][0] == '-') { + relative = true; + id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16); + } + else { + id = strtol(tokens[0].c_str(), nullptr, 16); + } + + lastid = id; + + unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16); + StringIDRef sid(new StringID(id, QByteArray(), static_cast(flag))); + + StringID& d = *sid._sid; + d._sids.reserve(tokens.size() - 2); + + int j = 2; + if (relative && last) { + for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) { + long m = last->_sids[j - 2].value(); + long n; + if (tokens[j][0] == '-') { + n = -strtol(&tokens[j][1], nullptr, 16); + } + else { + n = strtol(&tokens[j][0], nullptr, 16); + } + StringIDRef sid = getID(m + n); + if (!sid) { + FC_THROWM(Base::RuntimeError, "Invalid string id reference"); + } + d._sids.push_back(sid); + } + } + for (; j < (int)tokens.size(); ++j) { + long n = strtol(tokens[j].data(), nullptr, 16); + StringIDRef sid = getID(relative ? id - n : n); + if (!sid) { + FC_THROWM(Base::RuntimeError, "Invalid string id reference"); + } + d._sids.push_back(sid); + } + + if (!d.isPostfixed()) { + stream >> content; + if (d.isHashed() || d.isBinary()) { + d._data = QByteArray::fromBase64(content.c_str()); + } + else { + d._data = content.c_str(); + } + } + else { + int offset = 0; + if (d.isPostfixEncoded()) { + offset = 1; + if (d._sids.empty()) { + FC_THROWM(Base::RuntimeError, "Missing string postfix"); + } + d._postfix = d._sids[0]._sid->_data; + } + if (d.isIndexed()) { + if (d._sids.size() <= offset) { + FC_THROWM(Base::RuntimeError, "Missing string prefix"); + } + d._data = d._sids[offset]._sid->_data; + } + else if (d.isPrefixID() || d.isPrefixIDIndex()) { + if (d._sids.size() <= offset) { + FC_THROWM(Base::RuntimeError, "Missing string prefix id"); + } + int index = 0; + if (d.isPrefixIDIndex()) { + if (!(stream >> index)) { + FC_THROWM(Base::RuntimeError, "Missing string prefix index"); + } + } + d._data = d._sids[offset]._sid->toString(index).c_str(); + } + else { + stream >> content; + d._data = content.c_str(); + } + if (!d.isPostfixEncoded()) { + stream >> content; + d._postfix = content.c_str(); + } + } + + last = insert(sid); + } +} + +StringID* StringHasher::insert(const StringIDRef& sid) +{ + assert(sid && sid._sid->_hasher == nullptr); + auto& hasher = *sid._sid; + hasher._hasher = this; + hasher.ref(); + auto res = _hashes->right.insert(_hashes->right.end(), + HashMap::right_map::value_type(sid.value(), &hasher)); + if (res->second != &hasher) { + hasher._hasher = nullptr; + hasher.unref(); + } + return res->second; +} + +void StringHasher::restoreStream(std::istream& stream, std::size_t count) +{ + _hashes->clear(); + std::string content; + for (uint32_t i = 0; i < count; ++i) { + int32_t id = 0; + uint8_t type = 0; + stream >> id >> type >> content; + StringIDRef sid = new StringID(id, QByteArray(), static_cast(type)); + if (sid.isHashed() || sid.isBinary()) { + sid._sid->_data = QByteArray::fromBase64(content.c_str()); + } + else { + sid._sid->_data = QByteArray(content.c_str()); + } + insert(sid); + } +} + +void StringHasher::clear() +{ + for (auto& hasher : _hashes->right) { + hasher.second->_hasher = nullptr; + hasher.second->unref(); + } + _hashes->clear(); +} + +size_t StringHasher::size() const +{ + return _hashes->size(); +} + +size_t StringHasher::count() const +{ + size_t count = 0; + for (auto& hasher : _hashes->right) { + if (hasher.second->getRefCount() > 1) { + ++count; + } + } + return count; +} + +void StringHasher::Restore(Base::XMLReader& reader) +{ + clear(); + reader.readElement("StringHasher"); + _hashes->SaveAll = reader.getAttributeAsInteger("saveall") != 0L; + _hashes->Threshold = static_cast(reader.getAttributeAsInteger("threshold")); + + bool newTag = false; + if (reader.hasAttribute("new") && reader.getAttributeAsInteger("new") > 0) { + reader.readElement("StringHasher2"); + newTag = true; + } + + if (reader.hasAttribute("file")) { + const char* file = reader.getAttribute("file"); + if (*file != '\0') { + reader.addFile(file, this); + } + return; + } + + std::size_t count = reader.getAttributeAsUnsigned("count"); + if (newTag) { + restoreStreamNew(reader.beginCharStream(), count); + reader.readEndElement("StringHasher2"); + return; + } + if ((count != 0U) && reader.FileVersion > 1) { + restoreStream(reader.beginCharStream(), count); + } + else { + for (std::size_t i = 0; i < count; ++i) { + reader.readElement("Item"); + StringIDRef sid; + long id = reader.getAttributeAsInteger("id"); + bool hashed = reader.hasAttribute("hash"); + if (hashed || reader.hasAttribute("data")) { + const char* value = + hashed ? reader.getAttribute("hash") : reader.getAttribute("data"); + sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed); + } + else { + sid = new StringID(id, QByteArray(reader.getAttribute("text"))); + } + insert(sid); + } + } + reader.readEndElement("StringHasher"); +} + +unsigned int StringHasher::getMemSize() const +{ + return (_hashes->SaveAll ? size() : count()) * 10; +} + +PyObject* StringHasher::getPyObject() +{ + return new StringHasherPy(this); +} + +std::map StringHasher::getIDMap() const +{ + std::map ret; + for (auto& hasher : _hashes->right) { + ret.emplace_hint(ret.end(), hasher.first, StringIDRef(hasher.second)); + } + return ret; } void StringHasher::clearMarks() const { - for (auto &v : _hashes->right) - v.second->_flags.setFlag(StringID::Flag::Marked, false); + for (auto& hasher : _hashes->right) { + hasher.second->_flags.setFlag(StringID::Flag::Marked, false); + } } diff --git a/src/App/StringHasher.h b/src/App/StringHasher.h index 9c676c3076..f99d515a96 100644 --- a/src/App/StringHasher.h +++ b/src/App/StringHasher.h @@ -1,27 +1,27 @@ -/**************************************************************************** -* Copyright (c) 2022 Zheng Lei (realthunder) * -* * -* This file is part of the FreeCAD CAx development system. * -* * -* This library is free software; you can redistribute it and/or * -* modify it under the terms of the GNU Library General Public * -* License as published by the Free Software Foundation; either * -* version 2 of the License, or (at your option) any later version. * -* * -* This library is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU Library General Public License for more details. * -* * -* You should have received a copy of the GNU Library General Public * -* License along with this library; see the file COPYING.LIB. If not, * -* write to the Free Software Foundation, Inc., 59 Temple Place, * -* Suite 330, Boston, MA 02111-1307, USA * -* * -****************************************************************************/ +// SPDX-License-Identifier: LGPL-2.1-or-later -#ifndef APP_STRINGID_H -#define APP_STRINGID_H +/*************************************************************************************************** + * * + * Copyright (c) 2022 Zheng, Lei (realthunder) * + * Copyright (c) 2023 FreeCAD Project Association * + * * + * This file is part of FreeCAD. * + * * + * FreeCAD is free software: you can redistribute it and/or modify it under the terms of the * + * GNU Lesser General Public License as published by the Free Software Foundation, either * + * version 2.1 of the License, or (at your option) any later version. * + * * + * FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public License along with * + * FreeCAD. If not, see . * + * * + **************************************************************************************************/ + +#ifndef APP_STRING_ID_H +#define APP_STRING_ID_H #include #include @@ -33,648 +33,751 @@ #include #include #include +#include -namespace Data{ +namespace Data +{ class MappedName; } -namespace App { +namespace App +{ class StringHasher; class StringID; class StringIDRef; -typedef Base::Reference StringHasherRef; +using StringHasherRef = Base::Reference; /** Class to store a string -* -* The main purpose of this class is to provide an efficient storage of the -* mapped geometry element name (i.e. the new Topological Naming), but it can -* also be used as a general purpose string table. -* -* The StringID is to be stored in a string table (StringHasher), and be -* referred to by an integer ID. The stored data can be optionally divided into -* two parts, prefix and postfix. This is because a new mapped name is often -* created by adding some common postfix to an existing name, so data sharing -* can be improved using the following techniques: -* -* a) reference count (through QByteArray) the main data part, -* -* b) (recursively) encode prefix and/or postfix as an integer (in the -* format of #, e.g. #1b) that references another StringID, -* -* c) Check index based name in prefix, e.g. Edge1, Vertex2, and encode -* only the text part as StringID. The index is stored separately in -* reference class StringIDRef to maximize data sharing. -*/ -class AppExport StringID: public Base::BaseClass, public Base::Handled { - TYPESYSTEM_HEADER_WITH_OVERRIDE(); + * + * The main purpose of this class is to provide an efficient storage of the + * mapped geometry element name (i.e. the new Topological Naming), but it can + * also be used as a general purpose string table. + * + * The StringID is to be stored in a string table (StringHasher), and be + * referred to by an integer ID. The stored data can be optionally divided into + * two parts, prefix and postfix. This is because a new mapped name is often + * created by adding some common postfix to an existing name, so data sharing + * can be improved using the following techniques: + * + * a) reference count (through QByteArray) the main data part, + * + * b) (recursively) encode prefix and/or postfix as an integer (in the + * format of #, e.g. #1b) that references another StringID, + * + * c) Check index based name in prefix, e.g. Edge1, Vertex2, and encode + * only the text part as StringID. The index is stored separately in + * reference class StringIDRef to maximize data sharing. + */ +class AppExport StringID: public Base::BaseClass, public Base::Handled +{ + TYPESYSTEM_HEADER_WITH_OVERRIDE(); + public: - /// Flag of the stored string data - enum class Flag { - /// No flag - None = 0, - /// The stored data is binary - Binary = 1 << 0, - /// The stored data is the sha1 hash of the original content - Hashed = 1 << 1, - /** Postfix is encoded as #, e.g. #1b, where the hex integer part - * refers to another StringID. - */ - PostfixEncoded = 1 << 2, - /// The data is splited as prefix and postfix - Postfixed = 1 << 3, - /// The prefix data is split as text + index - Indexed = 1 << 4, - /** The prefix data is encoded as #, e.g. #1b, where the hex - * integer part refers to another StringID. - */ - PrefixID = 1 << 5, - /** The prefix split as text + index, where the text is encoded - * using another StringID. - */ - PrefixIDIndex = 1 << 6, - /// The string ID is persistent regardless of internal mark */ - Persistent = 1 << 7, - /// Internal marked used to check if the string ID is used - Marked = 1 << 8, - }; - typedef Base::Flags Flags; + /// Flag of the stored string data + enum class Flag + { + /// No flag + None = 0, + /// The stored data is binary + Binary = 1 << 0, + /// The stored data is the sha1 hash of the original content + Hashed = 1 << 1, + /** Postfix is encoded as #, e.g. #1b, where the hex integer part + * refers to another StringID. + */ + PostfixEncoded = 1 << 2, + /// The data is split as prefix and postfix + Postfixed = 1 << 3, + /// The prefix data is split as text + index + Indexed = 1 << 4, + /** The prefix data is encoded as #, e.g. #1b, where the hex + * integer part refers to another StringID. + */ + PrefixID = 1 << 5, + /** The prefix split as text + index, where the text is encoded + * using another StringID. + */ + PrefixIDIndex = 1 << 6, + /// The string ID is persistent regardless of internal mark + Persistent = 1 << 7, + /// Internal marked used to check if the string ID is used + Marked = 1 << 8, + }; + using Flags = Base::Flags; - /** Constructor - * @param id: integer ID of this StringID - * @param data: input data - * @param flags: flags describes the data - * - * User code is not supposed to create StringID directly, but through StringHasher::getID() - */ - StringID(long id, const QByteArray &data, const Flags &flags=Flag::None) - :_id(id),_data(data),_flags(flags) - {} + /** Constructor + * @param id: integer ID of this StringID + * @param data: input data + * @param flags: flags describes the data + * + * User code is not supposed to create StringID directly, but through StringHasher::getID() + */ + StringID(long id, QByteArray data, const Flags& flags = Flag::None) + : _id(id), + _data(std::move(data)), + _flags(flags) + {} - /// Constructs an empty StringID - StringID() - :_id(0), _flags(Flag::None) - {} + /// Constructs an empty StringID + StringID() + : _id(0), + _flags(Flag::None) + {} - virtual ~StringID(); + ~StringID() override; - /// Returns the ID of this StringID - long value() const {return _id;} + /// Returns the ID of this StringID + long value() const + { + return _id; + } - /// Returns all related StringIDs that used to encode this StringID - const QVector &relatedIDs() const {return _sids;} + /// Returns all related StringIDs that used to encode this StringID + const QVector& relatedIDs() const + { + return _sids; + } - /// @name Flag accessors - //@{ - bool isBinary() const; - bool isHashed() const; - bool isPostfixed() const; - bool isPostfixEncoded() const; - bool isIndexed() const; - bool isPrefixID() const; - bool isPrefixIDIndex() const; - bool isMarked() const; - bool isPersistent() const; - //@} + /// @name Flag accessors + //@{ + bool isBinary() const; + bool isHashed() const; + bool isPostfixed() const; + bool isPostfixEncoded() const; + bool isIndexed() const; + bool isPrefixID() const; + bool isPrefixIDIndex() const; + bool isMarked() const; + bool isPersistent() const; + //@} - /// Checks if this StringID is from the input hasher - bool isFromSameHasher(const StringHasherRef & hasher) const - { - return this->_hasher == hasher; - } + /// Checks if this StringID is from the input hasher + bool isFromSameHasher(const StringHasherRef& hasher) const + { + return this->_hasher == hasher; + } - /// Returns the owner hasher - StringHasherRef getHasher() const - { - return StringHasherRef(_hasher); - } + /// Returns the owner hasher + StringHasherRef getHasher() const + { + return {_hasher}; + } - /// Returns the data (prefix) - const QByteArray data() const {return _data;} - /// Returns the postfix - const QByteArray postfix() const {return _postfix;} + /// Returns the data (prefix) + QByteArray data() const + { + return _data; + } + /// Returns the postfix + QByteArray postfix() const + { + return _postfix; + } - virtual PyObject *getPyObject() override; - /// Returns a Python tuple containing both the text and index - PyObject *getPyObjectWithIndex(int index); + PyObject* getPyObject() override; + /// Returns a Python tuple containing both the text and index + PyObject* getPyObjectWithIndex(int index); - /** Convert to string represtation of this StringID - * @param index: optional index - * - * The format is #. And if index is non zero, then #:. Both - * and are in hex format. - */ - std::string toString(int index) const; + /** Convert to string representation of this StringID + * @param index: optional index + * + * The format is #. And if index is non zero, then #:. Both + * and are in hex format. + */ + std::string toString(int index) const; - /// Light weight structure of holding a string ID and associated index - struct IndexID { - long id; - int index; + /// Light weight structure of holding a string ID and associated index + struct IndexID + { + long id; + int index; - explicit operator bool() const { - return id > 0; - } + explicit operator bool() const + { + return id > 0; + } - friend std::ostream & operator << (std::ostream &s, const IndexID & id) { - s << id.id; - if (id.index) - s << ':' << id.index; - return s; - } - }; + friend std::ostream& operator<<(std::ostream& stream, const IndexID& indexID) + { + stream << indexID.id; + if (indexID.index != 0) { + stream << ':' << indexID.index; + } + return stream; + } + }; - /** Parse string to get ID and index - * @param name: input string - * @param eof: Whether to check the end of string. If true, then the input - * string must contain only the string representation of this - * StringID - * @param size: input string size, or -1 if the input string is zero terminated. - * @return Return the integer ID and index. - * - * The input string is expected to be in the format of # or with index - * #:, where both id and index are in hex digits. - */ - static IndexID fromString(const char *name, bool eof=true, int size = -1); + /** Parse string to get ID and index + * @param name: input string + * @param eof: Whether to check the end of string. If true, then the input + * string must contain only the string representation of this + * StringID + * @param size: input string size, or -1 if the input string is zero terminated. + * @return Return the integer ID and index. + * + * The input string is expected to be in the format of # or with index + * #:, where both id and index are in hex digits. + */ + static IndexID fromString(const char* name, bool eof = true, int size = -1); - /** Parse string to get ID and index - * @param bytes: input data - * @param eof: Whether to check the end of string. If true, then the input - * string must contain only the string representation of this - * StringID - * - * The input string is expected to be in the format of # or with index - * #:, where both id and index are in hex digits. - */ - static IndexID fromString(const QByteArray &bytes, bool eof=true) { - return fromString(bytes.constData(), eof, bytes.size()); - } + /** Parse string to get ID and index + * @param bytes: input data + * @param eof: Whether to check the end of string. If true, then the input + * string must contain only the string representation of this + * StringID + * + * The input string is expected to be in the format of # or with index + * #:, where both id and index are in hex digits. + */ + static IndexID fromString(const QByteArray& bytes, bool eof = true) + { + return fromString(bytes.constData(), eof, bytes.size()); + } - /** Get the text content of this StringID - * @param index: optional index - * @return Return the text content of this StringID. If the data is binary, - * then output in base64 encoded string. - */ - std::string dataToText(int index) const; + /** Get the text content of this StringID + * @param index: optional index + * @return Return the text content of this StringID. If the data is binary, + * then output in base64 encoded string. + */ + std::string dataToText(int index) const; - /** Get the content of this StringID as QByteArray - * @param bytes: output bytes - * @param index: opttional index. - */ - void toBytes(QByteArray &bytes, int index) const { - if (_postfix.size()) - bytes = _data + _postfix; - else if (index) - bytes = _data + QByteArray::number(index); - else - bytes = _data; - } + /** Get the content of this StringID as QByteArray + * @param bytes: output bytes + * @param index: optional index. + */ + void toBytes(QByteArray& bytes, int index) const + { + if (_postfix.size() != 0) { + bytes = _data + _postfix; + } + else if (index != 0) { + bytes = _data + QByteArray::number(index); + } + else { + bytes = _data; + } + } - /// Mark this StringID as used - void mark() const; + /// Mark this StringID as used + void mark() const; - /// Mark the StringID as persistent regardless of usage mark - void setPersistent(bool enable); + /// Mark the StringID as persistent regardless of usage mark + void setPersistent(bool enable); - bool operator<(const StringID &other) const { - return compare(other) < 0; - } + bool operator<(const StringID& other) const + { + return compare(other) < 0; + } - /** Compare StringID - * @param other: the other StringID for comparison - * @return Returns -1 if less than the other StringID, 1 if greater, or 0 if equal - */ - int compare(const StringID &other) const { - if (_hasher < other._hasher) - return -1; - if (_hasher > other._hasher) - return 1; - if (_id < other._id) - return -1; - if (_id > other._id) - return 1; - return 0; - } + /** Compare StringID + * @param other: the other StringID for comparison + * @return Returns -1 if less than the other StringID, 1 if greater, or 0 if equal + */ + int compare(const StringID& other) const + { + if (_hasher < other._hasher) { + return -1; + } + if (_hasher > other._hasher) { + return 1; + } + if (_id < other._id) { + return -1; + } + if (_id > other._id) { + return 1; + } + return 0; + } - friend class StringHasher; + friend class StringHasher; private: - long _id; - QByteArray _data; - QByteArray _postfix; - StringHasher *_hasher = nullptr; - mutable Flags _flags; - mutable QVector _sids; + long _id; + QByteArray _data; + QByteArray _postfix; + StringHasher* _hasher = nullptr; + mutable Flags _flags; + mutable QVector _sids; }; ////////////////////////////////////////////////////////////////////////// /** Counted reference to a StringID instance -*/ + */ class StringIDRef { public: - StringIDRef() - :_sid(nullptr), _index(0) - {} + StringIDRef() + : _sid(nullptr), + _index(0) + {} - StringIDRef(StringID* p, int index=0) - : _sid(p), _index(index) - { - if (_sid) - _sid->ref(); - } + StringIDRef(StringID* stringID, int index = 0) + : _sid(stringID), + _index(index) + { + if (_sid) { + _sid->ref(); + } + } - StringIDRef(const StringIDRef & other) - : _sid(other._sid) - , _index(other._index) - { - if (_sid) - _sid->ref(); - } + StringIDRef(const StringIDRef& other) + : _sid(other._sid), + _index(other._index) + { + if (_sid) { + _sid->ref(); + } + } - StringIDRef(StringIDRef && other) - : _sid(other._sid) - , _index(other._index) - { - other._sid = nullptr; - } + StringIDRef(StringIDRef&& other) noexcept + : _sid(other._sid), + _index(other._index) + { + other._sid = nullptr; + } - StringIDRef(const StringIDRef & other, int index) - : _sid(other._sid) - , _index(index) - { - if (_sid) - _sid->ref(); - } + StringIDRef(const StringIDRef& other, int index) + : _sid(other._sid), + _index(index) + { + if (_sid) { + _sid->ref(); + } + } - ~StringIDRef() - { - if (_sid) - _sid->unref(); - } + ~StringIDRef() + { + if (_sid) { + _sid->unref(); + } + } - void reset(const StringIDRef & p = StringIDRef()) { - *this = p; - } + void reset(const StringIDRef& stringID = StringIDRef()) + { + *this = stringID; + } - void reset(const StringIDRef &p, int index) { - *this = p; - this->_index = index; - } + void reset(const StringIDRef& stringID, int index) + { + *this = stringID; + this->_index = index; + } - void swap(StringIDRef &p) { - if(*this != p) { - auto tmp = p; - p = *this; - *this = tmp; - } - } + void swap(StringIDRef& stringID) + { + if (*this != stringID) { + auto tmp = stringID; + stringID = *this; + *this = tmp; + } + } - StringIDRef & operator=(StringID* p) { - if (_sid == p) - return *this; - if (_sid) - _sid->unref(); - _sid = p; - if (_sid) - _sid->ref(); - this->_index = 0; - return *this; - } + StringIDRef& operator=(StringID* stringID) + { + if (_sid == stringID) { + return *this; + } + if (_sid) { + _sid->unref(); + } + _sid = stringID; + if (_sid) { + _sid->ref(); + } + this->_index = 0; + return *this; + } - StringIDRef & operator=(const StringIDRef & p) { - if (_sid != p._sid) { - if (_sid) - _sid->unref(); - _sid = p._sid; - if (_sid) - _sid->ref(); - } - this->_index = p._index; - return *this; - } + StringIDRef& operator=(const StringIDRef& stringID) + { + if (&stringID == this) { + return *this; + } + if (_sid != stringID._sid) { + if (_sid) { + _sid->unref(); + } + _sid = stringID._sid; + if (_sid) { + _sid->ref(); + } + } + this->_index = stringID._index; + return *this; + } - StringIDRef & operator=(StringIDRef && p) { - if (_sid != p._sid) { - if (_sid) - _sid->unref(); - _sid = p._sid; - p._sid = nullptr; - } - this->_index = p._index; - return *this; - } + StringIDRef& operator=(StringIDRef&& stringID) noexcept + { + if (_sid != stringID._sid) { + if (_sid) { + _sid->unref(); + } + _sid = stringID._sid; + stringID._sid = nullptr; + } + this->_index = stringID._index; + return *this; + } - bool operator<(const StringIDRef & p) const { - if (!_sid) - return true; - if (!p._sid) - return false; - int res = _sid->compare(*p._sid); - if (res < 0) - return true; - if (res > 0) - return false; - return _index < p._index; - } + bool operator<(const StringIDRef& stringID) const + { + if (!_sid) { + return true; + } + if (!stringID._sid) { + return false; + } + int res = _sid->compare(*stringID._sid); + if (res < 0) { + return true; + } + if (res > 0) { + return false; + } + return _index < stringID._index; + } - bool operator==(const StringIDRef & p) const { - return _sid == p._sid && _index == p._index; - } + bool operator==(const StringIDRef& stringID) const + { + return _sid == stringID._sid && _index == stringID._index; + } - bool operator!=(const StringIDRef & p) const { - return _sid != p._sid || _index != p._index; - } + bool operator!=(const StringIDRef& stringID) const + { + return _sid != stringID._sid || _index != stringID._index; + } - explicit operator bool() const { - return _sid != nullptr; - } + explicit operator bool() const + { + return _sid != nullptr; + } - int getRefCount(void) const { - if (_sid) - return _sid->getRefCount(); - return 0; - } + int getRefCount() const + { + if (_sid) { + return _sid->getRefCount(); + } + return 0; + } - std::string toString() const { - if (_sid) - return _sid->toString(_index); - return std::string(); - } + std::string toString() const + { + if (_sid) { + return _sid->toString(_index); + } + return {}; + } - std::string dataToText() const { - if (_sid) - return _sid->dataToText(_index); - return std::string(); - } + std::string dataToText() const + { + if (_sid) { + return _sid->dataToText(_index); + } + return {}; + } - const char * constData() const { - if (_sid) { - assert(_index == 0); - assert(_sid->postfix().isEmpty()); - return _sid->data().constData(); - } - return ""; - } + const char* constData() const + { + if (_sid) { + assert(_index == 0); + assert(_sid->postfix().isEmpty()); + return _sid->data().constData(); + } + return ""; + } - const StringID & deref() const { - return *_sid; - } + const StringID& deref() const + { + return *_sid; + } - long value() const { - if (_sid) - return _sid->value(); - return 0; - } + long value() const + { + if (_sid) { + return _sid->value(); + } + return 0; + } - QVector relatedIDs() const { - if (_sid) - return _sid->relatedIDs(); - return QVector(); - } + QVector relatedIDs() const + { + if (_sid) { + return _sid->relatedIDs(); + } + return {}; + } - bool isBinary() const { - if (_sid) - return _sid->isBinary(); - return false; - } + bool isBinary() const + { + if (_sid) { + return _sid->isBinary(); + } + return false; + } - bool isHashed() const { - if (_sid) - return _sid->isHashed(); - return false; - } + bool isHashed() const + { + if (_sid) { + return _sid->isHashed(); + } + return false; + } - void toBytes(QByteArray &bytes) const { - if (_sid) - _sid->toBytes(bytes, _index); - } + void toBytes(QByteArray& bytes) const + { + if (_sid) { + _sid->toBytes(bytes, _index); + } + } - PyObject *getPyObject(void) { - if (_sid) - return _sid->getPyObjectWithIndex(_index); - Py_INCREF(Py_None); - return Py_None; - } + PyObject* getPyObject() + { + if (_sid) { + return _sid->getPyObjectWithIndex(_index); + } + Py_INCREF(Py_None); + return Py_None; + } - void mark() const { - if (_sid) - _sid->mark(); - } + void mark() const + { + if (_sid) { + _sid->mark(); + } + } - bool isMarked() const { - return _sid && _sid->isMarked(); - } + bool isMarked() const + { + return _sid && _sid->isMarked(); + } - bool isFromSameHasher(const StringHasherRef & hasher) const - { - return _sid && _sid->isFromSameHasher(hasher); - } + bool isFromSameHasher(const StringHasherRef& hasher) const + { + return _sid && _sid->isFromSameHasher(hasher); + } - StringHasherRef getHasher() const - { - if (_sid) - return _sid->getHasher(); - return StringHasherRef(); - } + StringHasherRef getHasher() const + { + if (_sid) { + return _sid->getHasher(); + } + return {}; + } - void setPersistent(bool enable) - { - if (_sid) - _sid->setPersistent(enable); - } + void setPersistent(bool enable) + { + if (_sid) { + _sid->setPersistent(enable); + } + } - friend class StringHasher; + friend class StringHasher; private: - StringID *_sid; - int _index; + StringID* _sid; + int _index; }; /// A String table to map string from/to a unique integer -class AppExport StringHasher: public Base::Persistence, public Base::Handled { +class AppExport StringHasher: public Base::Persistence, public Base::Handled +{ - TYPESYSTEM_HEADER_WITH_OVERRIDE(); + TYPESYSTEM_HEADER_WITH_OVERRIDE(); public: - StringHasher(); - virtual ~StringHasher(); + StringHasher(); + ~StringHasher() override; - virtual unsigned int getMemSize (void) const override; - virtual void Save (Base::Writer &/*writer*/) const override; - virtual void Restore(Base::XMLReader &/*reader*/) override; - virtual void SaveDocFile (Base::Writer &/*writer*/) const override; - virtual void RestoreDocFile (Base::Reader &/*reader*/) override; - void setPersistenceFileName(const char *name) const; - const std::string &getPersistenceFileName() const; + unsigned int getMemSize() const override; + void Save(Base::Writer& /*writer*/) const override; + void Restore(Base::XMLReader& /*reader*/) override; + void SaveDocFile(Base::Writer& /*writer*/) const override; + void RestoreDocFile(Base::Reader& /*reader*/) override; + void setPersistenceFileName(const char* name) const; + const std::string& getPersistenceFileName() const; - /** Maps an arbitrary string to an integer - * - * @param text: input string. - * @param len: length of the string, or -1 if the string is 0 terminated. - * @param hashable: whether the string is hashable. - * @return Return a shared pointer to the internally stored StringID. - * - * The function maps an arbitrary text string to a unique integer ID, which - * is returned as a shared pointer to reference count the ID so that it is - * possible to prune any unused strings. - * - * If \c hashable is true and the string is longer than the threshold - * setting of this StringHasher, it will be sha1 hashed before storing, and - * the original content of the string is discarded. If else, the string is - * copied and stored inside a StringID instance. - * - * The purpose of function is to provide a short form of a stable string - * identification. - */ - StringIDRef getID(const char *text, int len=-1, bool hashable=false); + /** Maps an arbitrary string to an integer + * + * @param text: input string. + * @param len: length of the string, or -1 if the string is 0 terminated. + * @param hashable: whether the string is hashable. + * @return Return a shared pointer to the internally stored StringID. + * + * The function maps an arbitrary text string to a unique integer ID, which + * is returned as a shared pointer to reference count the ID so that it is + * possible to prune any unused strings. + * + * If \c hashable is true and the string is longer than the threshold + * setting of this StringHasher, it will be sha1 hashed before storing, and + * the original content of the string is discarded. If else, the string is + * copied and stored inside a StringID instance. + * + * The purpose of function is to provide a short form of a stable string + * identification. + */ + StringIDRef getID(const char* text, int len = -1, bool hashable = false); - /// Option for string string data - enum class Option { - /// No option - None = 0, - /// The input data is binary - Binary = 1 << 0, - /** The input data is hashable. If the data length is longer than the - * threshold setting of the StringHasher, it will be sha1 hashed before - * storing, and the original content of the string is discarded. - */ - Hashable = 1 << 1, - /// Do not copy the data, assuming the data is constant. If this option - //is not set, the data will be copied before storing. - NoCopy = 1 << 2, - }; - typedef Base::Flags