Merge pull request #9148 from chennes/toponamingStringHasher

App/Toponaming: String hasher
This commit is contained in:
Chris Hennes
2023-05-15 20:14:43 -05:00
committed by GitHub
19 changed files with 4227 additions and 4 deletions

View File

@@ -90,6 +90,8 @@ generate_from_xml(GeoFeatureGroupExtensionPy)
generate_from_xml(MetadataPy)
generate_from_xml(OriginGroupExtensionPy)
generate_from_xml(PartPy)
generate_from_xml(StringHasherPy)
generate_from_xml(StringIDPy)
generate_from_xml(ComplexGeoDataPy)
generate_from_xml(PropertyContainerPy)
@@ -115,6 +117,8 @@ SET(FreeCADApp_XML_SRCS
PropertyContainerPy.xml
ComplexGeoDataPy.xml
MaterialPy.xml
StringHasherPy.xml
StringIDPy.xml
)
SOURCE_GROUP("XML" FILES ${FreeCADApp_XML_SRCS})
@@ -270,6 +274,9 @@ SET(FreeCADApp_CPP_SRCS
MaterialPyImp.cpp
Metadata.cpp
MetadataPyImp.cpp
StringHasher.cpp
StringHasherPyImp.cpp
StringIDPyImp.cpp
)
SET(FreeCADApp_HPP_SRCS
@@ -288,6 +295,7 @@ SET(FreeCADApp_HPP_SRCS
MappedElement.h
Material.h
Metadata.h
StringHasher.h
)
SET(FreeCADApp_SRCS

869
src/App/StringHasher.cpp Normal file
View File

@@ -0,0 +1,869 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/***************************************************************************************************
* *
* Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com> *
* Copyright (c) 2023 FreeCAD Project Association *
* *
* This file is part of FreeCAD. *
* *
* FreeCAD is free software: you can redistribute it and/or modify it under the terms of the *
* GNU Lesser General Public License as published by the Free Software Foundation, either *
* version 2.1 of the License, or (at your option) any later version. *
* *
* FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; *
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
* See the GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License along with *
* FreeCAD. If not, see <https://www.gnu.org/licenses/>. *
* *
**************************************************************************************************/
#include "PreCompiled.h"
#include <QCryptographicHash>
#include <QHash>
#include <deque>
#include <Base/Console.h>
#include <Base/Reader.h>
#include <Base/Stream.h>
#include <Base/Writer.h>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/bimap.hpp>
#include <boost/bimap/set_of.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/iostreams/stream.hpp>
#include "MappedElement.h"
#include "StringHasher.h"
#include "StringHasherPy.h"
#include "StringIDPy.h"
FC_LOG_LEVEL_INIT("App", true, true)
namespace bio = boost::iostreams;
using namespace App;
///////////////////////////////////////////////////////////
struct StringIDHasher
{
std::size_t operator()(const StringID* sid) const
{
if (!sid) {
return 0;
}
return qHash(sid->data(), qHash(sid->postfix()));
}
bool operator()(const StringID* IDa, const StringID* IDb) const
{
if (IDa == IDb) {
return true;
}
if (!IDa || !IDb) {
return false;
}
return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix();
}
};
using HashMapBase =
boost::bimap<boost::bimaps::unordered_set_of<StringID*, StringIDHasher, StringIDHasher>,
boost::bimaps::set_of<long>>;
class StringHasher::HashMap: public HashMapBase
{
public:
bool SaveAll = false;
int Threshold = 0;
};
///////////////////////////////////////////////////////////
TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass)
StringID::~StringID()
{
if (_hasher) {
_hasher->_hashes->right.erase(_id);
}
}
PyObject* StringID::getPyObject()
{
return new StringIDPy(this);
}
PyObject* StringID::getPyObjectWithIndex(int index)
{
auto res = new StringIDPy(this);
res->_index = index;
return res;
}
std::string StringID::toString(int index) const
{
std::ostringstream ss;
ss << '#' << std::hex << value();
if (index != 0) {
ss << ':' << index;
}
return ss.str();
}
StringID::IndexID StringID::fromString(const char* name, bool eof, int size)
{
IndexID res {};
res.id = 0;
res.index = 0;
if (!name) {
res.id = -1;
return res;
}
if (size < 0) {
size = static_cast<int>(std::strlen(name));
}
bio::stream<bio::array_source> iss(name, size);
char sep = 0;
char sep2 = 0;
iss >> sep >> std::hex >> res.id >> sep2 >> res.index;
if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) {
res.id = -1;
return res;
}
return res;
}
std::string StringID::dataToText(int index) const
{
if (isHashed() || isBinary()) {
return _data.toBase64().constData();
}
std::string res(_data.constData());
if (index != 0) {
res += std::to_string(index);
}
if (_postfix.size() != 0) {
res += _postfix.constData();
}
return res;
}
void StringID::mark() const
{
if (isMarked()) {
return;
}
_flags.setFlag(Flag::Marked);
for (auto& sid : _sids) {
sid.deref().mark();
}
}
///////////////////////////////////////////////////////////
TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence)
StringHasher::StringHasher()
: _hashes(new HashMap)
{}
StringHasher::~StringHasher()
{
clear();
}
void StringHasher::setSaveAll(bool enable)
{
if (_hashes->SaveAll == enable) {
return;
}
_hashes->SaveAll = enable;
compact();
}
void StringHasher::compact()
{
if (_hashes->SaveAll) {
return;
}
// Make a list of all the table entries that have only a single reference and are not marked
// "persistent"
std::deque<StringIDRef> pendings;
for (auto& hasher : _hashes->right) {
if (!hasher.second->isPersistent() && hasher.second->getRefCount() == 1) {
pendings.emplace_back(hasher.second);
}
}
// Recursively remove the unused StringIDs
while (!pendings.empty()) {
StringIDRef sid = pendings.front();
pendings.pop_front();
// Try to erase the map entry for this StringID
if (_hashes->right.erase(sid.value()) == 0U) {
continue;// If nothing was erased, there's nothing more to do
}
sid._sid->_hasher = nullptr;
sid._sid->unref();
for (auto& hasher : sid._sid->_sids) {
if (hasher._sid->_hasher == this && !hasher._sid->isPersistent()
&& hasher._sid->getRefCount() == 2) {
// If the related StringID also uses this hasher, is not marked persistent, and has
// a current reference count of 2 (which will be its hasher reference and its entry
// in the related SIDs list), then prep it for removal as well.
pendings.push_back(hasher);
}
}
}
}
bool StringHasher::getSaveAll() const
{
return _hashes->SaveAll;
}
void StringHasher::setThreshold(int threshold)
{
_hashes->Threshold = threshold;
}
int StringHasher::getThreshold() const
{
return _hashes->Threshold;
}
long StringHasher::lastID() const
{
if (_hashes->right.empty()) {
return 0;
}
auto it = _hashes->right.end();
--it;
return it->first;
}
StringIDRef StringHasher::getID(const char* text, int len, bool hashable)
{
if (len < 0) {
len = static_cast<int>(strlen(text));
}
return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None);
}
StringIDRef StringHasher::getID(const QByteArray& data, Options options)
{
bool binary = options.testFlag(Option::Binary);
bool hashable = options.testFlag(Option::Hashable);
bool nocopy = options.testFlag(Option::NoCopy);
bool hashed = hashable && _hashes->Threshold > 0 && (int)data.size() > _hashes->Threshold;
StringID dataID;
if (hashed) {
QCryptographicHash hasher(QCryptographicHash::Sha1);
hasher.addData(data);
dataID._data = hasher.result();
}
else {
dataID._data = data;
}
auto it = _hashes->left.find(&dataID);
if (it != _hashes->left.end()) {
return {it->first};
}
if (!hashed && !nocopy) {
// if not hashed, make a deep copy of the data
dataID._data = QByteArray(data.constData(), data.size());
}
StringID::Flags flags(StringID::Flag::None);
if (binary) {
flags.setFlag(StringID::Flag::Binary);
}
if (hashed) {
flags.setFlag(StringID::Flag::Hashed);
}
StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags));
return {insert(sid)};
}
StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<StringIDRef>& sids)
{
StringID tempID;
tempID._postfix = name.postfixBytes();
Data::IndexedName indexed;
if (tempID._postfix.size() != 0) {
// Only check for IndexedName if there is postfix, because of the way
// we restore the StringID. See StringHasher::saveStream/restoreStreamNew()
indexed = Data::IndexedName(name.dataBytes());
}
if (indexed) {
// If this is an IndexedName, then _data only stores the base part of the name, without the
// integer index
tempID._data =
QByteArray::fromRawData(indexed.getType(), static_cast<int>(strlen(indexed.getType())));
}
else {
// Store the entire name in _data, but temporarily re-use the existing memory
tempID._data = name.dataBytes();
}
// Check to see if there is already an entry in the hash table for this StringID
auto it = _hashes->left.find(&tempID);
if (it != _hashes->left.end()) {
auto res = StringIDRef(it->first);
if (indexed) {
res._index = indexed.getIndex();
}
return res;
}
if (!indexed && name.isRaw()) {
// Make a copy of the memory if we didn't do so earlier
tempID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
}
// If the postfix is not already encoded, use getID to encode it:
StringIDRef postfixRef;
if ((tempID._postfix.size() != 0) && tempID._postfix.indexOf("#") < 0) {
postfixRef = getID(tempID._postfix);
postfixRef.toBytes(tempID._postfix);
}
// If _data is an IndexedName, use getID to encode it:
StringIDRef indexRef;
if (indexed) {
indexRef = getID(tempID._data);
}
// The real StringID object that we are going to insert
StringIDRef newStringIDRef(new StringID(lastID() + 1, tempID._data));
StringID& newStringID = *newStringIDRef._sid;
if (tempID._postfix.size() != 0) {
newStringID._flags.setFlag(StringID::Flag::Postfixed);
newStringID._postfix = tempID._postfix;
}
// Count the related SIDs that use this hasher
int numSIDs = 0;
for (const auto& relatedID : sids) {
if (relatedID && relatedID._sid->_hasher == this) {
++numSIDs;
}
}
int numAddedSIDs = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
if (numSIDs == sids.size() && !postfixRef && !indexRef) {
// The simplest case: just copy the whole list
newStringID._sids = sids;
}
else {
// Put the added SIDs at the front of the SID list
newStringID._sids.reserve(numSIDs + numAddedSIDs);
if (postfixRef) {
newStringID._flags.setFlag(StringID::Flag::PostfixEncoded);
newStringID._sids.push_back(postfixRef);
}
if (indexRef) {
newStringID._flags.setFlag(StringID::Flag::Indexed);
newStringID._sids.push_back(indexRef);
}
// Append the sids from the input list whose hasher is this one
for (const auto& relatedID : sids) {
if (relatedID && relatedID._sid->_hasher == this) {
newStringID._sids.push_back(relatedID);
}
}
}
// If the number of related IDs is larger than some threshold (hardcoded to 10 right now), then
// remove any duplicates (ignoring the new SIDs we may have just added)
const int relatedIDSizeThreshold {10};
if (newStringID._sids.size() > relatedIDSizeThreshold) {
std::sort(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end());
newStringID._sids.erase(
std::unique(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end()),
newStringID._sids.end());
}
// If the new StringID has a postfix, but is not indexed, see if the data string itself
// contains an index.
if ((newStringID._postfix.size() != 0) && !indexed) {
// Use the fromString function to parse the new StringID's data field for a possible index
StringID::IndexID res = StringID::fromString(newStringID._data);
if (res.id > 0) {// If the data had an index
if (res.index != 0) {
indexed.setIndex(res.index);
newStringID._data.resize(newStringID._data.lastIndexOf(':')+1);
}
int offset = newStringID.isPostfixEncoded() ? 1 : 0;
// Search for the SID with that index
for (int i = offset; i < newStringID._sids.size(); ++i) {
if (newStringID._sids[i].value() == res.id) {
if (i != offset) {
// If this SID is not already the first element in sids, move it there by
// swapping it with whatever WAS there
std::swap(newStringID._sids[offset], newStringID._sids[i]);
}
if (res.index != 0) {
newStringID._flags.setFlag(StringID::Flag::PrefixIDIndex);
}
else {
newStringID._flags.setFlag(StringID::Flag::PrefixID);
}
break;
}
}
}
}
return {insert(newStringIDRef), indexed.getIndex()};
}
StringIDRef StringHasher::getID(long id, int index) const
{
if (id <= 0) {
return {};
}
auto it = _hashes->right.find(id);
if (it == _hashes->right.end()) {
return {};
}
StringIDRef res(it->second);
res._index = index;
return res;
}
void StringHasher::setPersistenceFileName(const char* filename) const
{
if (!filename) {
filename = "";
}
_filename = filename;
}
const std::string& StringHasher::getPersistenceFileName() const
{
return _filename;
}
void StringHasher::Save(Base::Writer& writer) const
{
size_t count = 0;
if (_hashes->SaveAll) {
count = _hashes->size();
}
else {
count = 0;
for (auto& hasher : _hashes->right) {
if (hasher.second->isMarked() || hasher.second->isPersistent()) {
++count;
}
}
}
writer.Stream() << writer.ind() << "<StringHasher saveall=\"" << _hashes->SaveAll
<< "\" threshold=\"" << _hashes->Threshold << "\"";
if (count == 0U) {
writer.Stream() << " count=\"0\"></StringHasher>\n";
return;
}
writer.Stream() << " count=\"0\" new=\"1\"/>\n";
writer.Stream() << writer.ind() << "<StringHasher2 ";
if (!_filename.empty()) {
writer.Stream() << " file=\"" << writer.addFile((_filename + ".txt").c_str(), this)
<< "\"/>\n";
return;
}
writer.Stream() << " count=\"" << count << "\">\n";
saveStream(writer.beginCharStream() << '\n');
writer.endCharStream() << '\n';
writer.Stream() << writer.ind() << "</StringHasher2>\n";
}
void StringHasher::SaveDocFile(Base::Writer& writer) const
{
std::size_t count = _hashes->SaveAll ? this->size() : this->count();
writer.Stream() << "StringTableStart v1 " << count << '\n';
saveStream(writer.Stream());
}
void StringHasher::saveStream(std::ostream& stream) const
{
boost::io::ios_flags_saver ifs(stream);
stream << std::hex;
long anchor = 0;
const StringID* last = nullptr;
long lastID = 0;
bool relative = false;
for (auto& hasher : _hashes->right) {
auto& d = *hasher.second;
long id = d._id;
if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) {
continue;
}
// We use relative coding to save space. But in order to have some
// minimum protection against corruption, write an absolute value every
// once a while.
relative = (id - anchor) < 1000;
if (relative) {
stream << '-' << id - lastID;
}
else {
anchor = id;
stream << id;
}
lastID = id;
int offset = d.isPostfixEncoded() ? 1 : 0;
StringID::IndexID prefixID {};
prefixID.id = 0;
prefixID.index = 0;
if (d.isPrefixID()) {
assert(d._sids.size() > offset);
prefixID.id = d._sids[offset].value();
}
else if (d.isPrefixIDIndex()) {
prefixID = StringID::fromString(d._data);
assert(d._sids.size() > offset && d._sids[offset].value() == prefixID.id);
}
auto flags = d._flags;
flags.setFlag(StringID::Flag::Marked, false);
stream << '.' << flags.toUnderlyingType();
int position = 0;
if (!relative) {
for (; position < d._sids.size(); ++position) {
stream << '.' << d._sids[position].value();
}
}
else {
if (last) {
for (; position < d._sids.size() && position < last->_sids.size(); ++position) {
long m = last->_sids[position].value();
long n = d._sids[position].value();
if (n < m) {
stream << ".-" << m - n;
}
else {
stream << '.' << n - m;
}
}
}
for (; position < d._sids.size(); ++position) {
stream << '.' << id - d._sids[position].value();
}
}
last = &d;
// Having postfix means it is a geometry element name, which
// guarantees to be a single line without space. So it is safe to
// store in raw stream.
if (d.isPostfixed()) {
if (!d.isPrefixIDIndex() && !d.isIndexed() && !d.isPrefixID()) {
stream << ' ' << d._data.constData();
}
if (!d.isPostfixEncoded()) {
stream << ' ' << d._postfix.constData();
}
stream << '\n';
}
else {
// Reaching here means the string may contain space and newlines
stream << ' ';
stream << std::dec << d._data.constData() << std::hex;
}
}
}
void StringHasher::RestoreDocFile(Base::Reader& reader)
{
std::string marker;
std::string ver;
reader >> marker;
std::size_t count = 0;
_hashes->clear();
if (marker == "StringTableStart") {
reader >> ver >> count;
if (ver != "v1") {
FC_WARN("Unknown string table format");
}
restoreStreamNew(reader, count);
return;
}
count = atoi(marker.c_str());
restoreStream(reader, count);
}
void StringHasher::restoreStreamNew(std::istream& stream, std::size_t count)
{
_hashes->clear();
std::string content;
boost::io::ios_flags_saver ifs(stream);
stream >> std::hex;
std::vector<std::string> tokens;
long lastid = 0;
const StringID* last = nullptr;
std::string tmp;
for (uint32_t i = 0; i < count; ++i) {
if (!(stream >> tmp)) {
FC_THROWM(Base::RuntimeError, "Invalid string table");
}
tokens.clear();
boost::split(tokens, tmp, boost::is_any_of("."));
if (tokens.size() < 2) {
FC_THROWM(Base::RuntimeError, "Invalid string table");
}
long id = 0;
bool relative = false;
if (tokens[0][0] == '-') {
relative = true;
id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16);
}
else {
id = strtol(tokens[0].c_str(), nullptr, 16);
}
lastid = id;
unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16);
StringIDRef sid(new StringID(id, QByteArray(), static_cast<StringID::Flag>(flag)));
StringID& d = *sid._sid;
d._sids.reserve(tokens.size() - 2);
int j = 2;
if (relative && last) {
for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) {
long m = last->_sids[j - 2].value();
long n;
if (tokens[j][0] == '-') {
n = -strtol(&tokens[j][1], nullptr, 16);
}
else {
n = strtol(&tokens[j][0], nullptr, 16);
}
StringIDRef sid = getID(m + n);
if (!sid) {
FC_THROWM(Base::RuntimeError, "Invalid string id reference");
}
d._sids.push_back(sid);
}
}
for (; j < (int)tokens.size(); ++j) {
long n = strtol(tokens[j].data(), nullptr, 16);
StringIDRef sid = getID(relative ? id - n : n);
if (!sid) {
FC_THROWM(Base::RuntimeError, "Invalid string id reference");
}
d._sids.push_back(sid);
}
if (!d.isPostfixed()) {
stream >> content;
if (d.isHashed() || d.isBinary()) {
d._data = QByteArray::fromBase64(content.c_str());
}
else {
d._data = content.c_str();
}
}
else {
int offset = 0;
if (d.isPostfixEncoded()) {
offset = 1;
if (d._sids.empty()) {
FC_THROWM(Base::RuntimeError, "Missing string postfix");
}
d._postfix = d._sids[0]._sid->_data;
}
if (d.isIndexed()) {
if (d._sids.size() <= offset) {
FC_THROWM(Base::RuntimeError, "Missing string prefix");
}
d._data = d._sids[offset]._sid->_data;
}
else if (d.isPrefixID() || d.isPrefixIDIndex()) {
if (d._sids.size() <= offset) {
FC_THROWM(Base::RuntimeError, "Missing string prefix id");
}
d._data = d._sids[offset]._sid->toString(0).c_str();
if (d.isPrefixIDIndex())
d._data += ":";
}
else {
stream >> content;
d._data = content.c_str();
}
if (!d.isPostfixEncoded()) {
stream >> content;
d._postfix = content.c_str();
}
}
last = insert(sid);
}
}
StringID* StringHasher::insert(const StringIDRef& sid)
{
assert(sid && sid._sid->_hasher == nullptr);
auto& hasher = *sid._sid;
hasher._hasher = this;
hasher.ref();
auto res = _hashes->right.insert(_hashes->right.end(),
HashMap::right_map::value_type(sid.value(), &hasher));
if (res->second != &hasher) {
hasher._hasher = nullptr;
hasher.unref();
}
return res->second;
}
void StringHasher::restoreStream(std::istream& stream, std::size_t count)
{
_hashes->clear();
std::string content;
for (uint32_t i = 0; i < count; ++i) {
int32_t id = 0;
uint8_t type = 0;
stream >> id >> type >> content;
StringIDRef sid = new StringID(id, QByteArray(), static_cast<StringID::Flag>(type));
if (sid.isHashed() || sid.isBinary()) {
sid._sid->_data = QByteArray::fromBase64(content.c_str());
}
else {
sid._sid->_data = QByteArray(content.c_str());
}
insert(sid);
}
}
void StringHasher::clear()
{
for (auto& hasher : _hashes->right) {
hasher.second->_hasher = nullptr;
hasher.second->unref();
}
_hashes->clear();
}
size_t StringHasher::size() const
{
return _hashes->size();
}
size_t StringHasher::count() const
{
size_t count = 0;
for (auto& hasher : _hashes->right) {
if (hasher.second->getRefCount() > 1) {
++count;
}
}
return count;
}
void StringHasher::Restore(Base::XMLReader& reader)
{
clear();
reader.readElement("StringHasher");
_hashes->SaveAll = reader.getAttributeAsInteger("saveall") != 0L;
_hashes->Threshold = static_cast<int>(reader.getAttributeAsInteger("threshold"));
bool newTag = false;
if (reader.hasAttribute("new") && reader.getAttributeAsInteger("new") > 0) {
reader.readElement("StringHasher2");
newTag = true;
}
if (reader.hasAttribute("file")) {
const char* file = reader.getAttribute("file");
if (*file != '\0') {
reader.addFile(file, this);
}
return;
}
std::size_t count = reader.getAttributeAsUnsigned("count");
if (newTag) {
restoreStreamNew(reader.beginCharStream(), count);
reader.readEndElement("StringHasher2");
return;
}
if ((count != 0U) && reader.FileVersion > 1) {
restoreStream(reader.beginCharStream(), count);
}
else {
for (std::size_t i = 0; i < count; ++i) {
reader.readElement("Item");
StringIDRef sid;
long id = reader.getAttributeAsInteger("id");
bool hashed = reader.hasAttribute("hash");
if (hashed || reader.hasAttribute("data")) {
const char* value =
hashed ? reader.getAttribute("hash") : reader.getAttribute("data");
sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed);
}
else {
sid = new StringID(id, QByteArray(reader.getAttribute("text")));
}
insert(sid);
}
}
reader.readEndElement("StringHasher");
}
unsigned int StringHasher::getMemSize() const
{
return (_hashes->SaveAll ? size() : count()) * 10;
}
PyObject* StringHasher::getPyObject()
{
return new StringHasherPy(this);
}
std::map<long, StringIDRef> StringHasher::getIDMap() const
{
std::map<long, StringIDRef> ret;
for (auto& hasher : _hashes->right) {
ret.emplace_hint(ret.end(), hasher.first, StringIDRef(hasher.second));
}
return ret;
}
void StringHasher::clearMarks() const
{
for (auto& hasher : _hashes->right) {
hasher.second->_flags.setFlag(StringID::Flag::Marked, false);
}
}

830
src/App/StringHasher.h Normal file
View File

@@ -0,0 +1,830 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/***************************************************************************************************
* *
* Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com> *
* Copyright (c) 2023 FreeCAD Project Association *
* *
* This file is part of FreeCAD. *
* *
* FreeCAD is free software: you can redistribute it and/or modify it under the terms of the *
* GNU Lesser General Public License as published by the Free Software Foundation, either *
* version 2.1 of the License, or (at your option) any later version. *
* *
* FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; *
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
* See the GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License along with *
* FreeCAD. If not, see <https://www.gnu.org/licenses/>. *
* *
**************************************************************************************************/
#ifndef APP_STRING_ID_H
#define APP_STRING_ID_H
#include <bitset>
#include <memory>
#include <QByteArray>
#include <QVector>
#include <Base/Bitmask.h>
#include <Base/Handle.h>
#include <Base/Persistence.h>
#include <CXX/Objects.hxx>
#include <utility>
#include <Base/PyObjectBase.h>
namespace Data
{
class MappedName;
}
namespace App
{
class StringHasher;
class StringID;
class StringIDRef;
using StringHasherRef = Base::Reference<StringHasher>;
/** Class to store a string
*
* The main purpose of this class is to provide an efficient storage of the
* mapped geometry element name (i.e. the new Topological Naming), but it can
* also be used as a general purpose string table.
*
* The StringID is to be stored in a string table (StringHasher), and be
* referred to by an integer ID. The stored data can be optionally divided into
* two parts, prefix and postfix. This is because a new mapped name is often
* created by adding some common postfix to an existing name, so data sharing
* can be improved using the following techniques:
*
* a) reference count (through QByteArray) the main data part,
*
* b) (recursively) encode prefix and/or postfix as an integer (in the
* format of #<hex>, e.g. #1b) that references another StringID,
*
* c) Check index based name in prefix, e.g. Edge1, Vertex2, and encode
* only the text part as StringID. The index is stored separately in
* reference class StringIDRef to maximize data sharing.
*/
class AppExport StringID: public Base::BaseClass, public Base::Handled
{
TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
public:
/// Flag of the stored string data
enum class Flag
{
/// No flag
None = 0,
/// The stored data is binary
Binary = 1 << 0,
/// The stored data is the sha1 hash of the original content
Hashed = 1 << 1,
/** Postfix is encoded as #<hex>, e.g. #1b, where the hex integer part
* refers to another StringID.
*/
PostfixEncoded = 1 << 2,
/// The data is split as prefix and postfix
Postfixed = 1 << 3,
/// The prefix data is split as text + index
Indexed = 1 << 4,
/** The prefix data is encoded as #<hex>, e.g. #1b, where the hex
* integer part refers to another StringID.
*/
PrefixID = 1 << 5,
/** The prefix split as text + index, where the text is encoded
* using another StringID.
*/
PrefixIDIndex = 1 << 6,
/// The string ID is persistent regardless of internal mark
Persistent = 1 << 7,
/// Internal marked used to check if the string ID is used
Marked = 1 << 8,
};
using Flags = Base::Flags<Flag>;
/** Constructor
* @param id: integer ID of this StringID
* @param data: input data
* @param flags: flags describes the data
*
* User code is not supposed to create StringID directly, but through StringHasher::getID()
*/
StringID(long id, QByteArray data, const Flags& flags = Flag::None)
: _id(id),
_data(std::move(data)),
_flags(flags)
{}
/// Constructs an empty StringID
StringID()
: _id(0),
_flags(Flag::None)
{}
StringID(const StringID& other) = delete;
StringID(StringID&& other) noexcept = delete;
StringID& operator=(const StringID& rhs) = delete;
StringID& operator=(StringID&& rhs) noexcept = delete;
~StringID() override;
/// Returns the ID of this StringID
long value() const
{
return _id;
}
/// Returns all related StringIDs that used to encode this StringID
const QVector<StringIDRef>& relatedIDs() const
{
return _sids;
}
/// @name Flag accessors
//@{
bool isBinary() const;
bool isHashed() const;
bool isPostfixed() const;
bool isPostfixEncoded() const;
bool isIndexed() const;
bool isPrefixID() const;
bool isPrefixIDIndex() const;
bool isMarked() const;
bool isPersistent() const;
//@}
/// Checks if this StringID is from the input hasher
bool isFromSameHasher(const StringHasherRef& hasher) const
{
return this->_hasher == hasher;
}
/// Returns the owner hasher
StringHasherRef getHasher() const
{
return {_hasher};
}
/// Returns the data (prefix)
QByteArray data() const
{
return _data;
}
/// Returns the postfix
QByteArray postfix() const
{
return _postfix;
}
/// Sets the postfix
void setPostfix(QByteArray postfix)
{
_postfix = std::move(postfix);
}
PyObject* getPyObject() override;
/// Returns a Python tuple containing both the text and index
PyObject* getPyObjectWithIndex(int index);
/** Convert to string representation of this StringID
* @param index: optional index
*
* The format is #<id>. And if index is non zero, then #<id>:<index>. Both
* <id> and <index> are in hex format.
*/
std::string toString(int index = 0) const;
/// Light weight structure of holding a string ID and associated index
struct IndexID
{
long id;
int index;
explicit operator bool() const
{
return id > 0;
}
friend std::ostream& operator<<(std::ostream& stream, const IndexID& indexID)
{
stream << indexID.id;
if (indexID.index != 0) {
stream << ':' << indexID.index;
}
return stream;
}
};
/** Parse string to get ID and index
* @param name: input string
* @param eof: Whether to check the end of string. If true, then the input
* string must contain only the string representation of this
* StringID
* @param size: input string size, or -1 if the input string is zero terminated.
* @return Return the integer ID and index.
*
* The input string is expected to be in the format of #<id> or with index
* #<id>:<index>, where both id and index are in hex digits.
*/
static IndexID fromString(const char* name, bool eof = true, int size = -1);
/** Parse string to get ID and index
* @param bytes: input data
* @param eof: Whether to check the end of string. If true, then the input
* string must contain only the string representation of this
* StringID
*
* The input string is expected to be in the format of #<id> or with index
* #<id>:<index>, where both id and index are in hex digits.
*/
static IndexID fromString(const QByteArray& bytes, bool eof = true)
{
return fromString(bytes.constData(), eof, bytes.size());
}
/** Get the text content of this StringID
* @param index: optional index
* @return Return the text content of this StringID. If the data is binary,
* then output in base64 encoded string.
*/
std::string dataToText(int index = 0) const;
/** Get the content of this StringID as QByteArray
* @param index: optional index.
*/
QByteArray dataToBytes(int index = 0) const
{
QByteArray res(_data);
if (index != 0) {
res += QByteArray::number(index);
}
if (_postfix.size() != 0) {
res += _postfix;
}
return res;
}
/// Mark this StringID as used
void mark() const;
/// Mark the StringID as persistent regardless of usage mark
void setPersistent(bool enable);
bool operator<(const StringID& other) const
{
return compare(other) < 0;
}
/** Compare StringID
* @param other: the other StringID for comparison
* @return Returns -1 if less than the other StringID, 1 if greater, or 0 if equal
*/
int compare(const StringID& other) const
{
if (_hasher < other._hasher) {
return -1;
}
if (_hasher > other._hasher) {
return 1;
}
if (_id < other._id) {
return -1;
}
if (_id > other._id) {
return 1;
}
return 0;
}
friend class StringHasher;
private:
long _id;
QByteArray _data;
QByteArray _postfix;
StringHasher* _hasher = nullptr;
mutable Flags _flags;
mutable QVector<StringIDRef> _sids;
};
//////////////////////////////////////////////////////////////////////////
/** Counted reference to a StringID instance
*/
class StringIDRef
{
public:
/// Default construction results in an empty StringIDRef object: it will evaluate to boolean
/// "false" if queried.
StringIDRef()
: _sid(nullptr),
_index(0)
{}
/// Standard construction from a heap-allocated StringID. This reference-counting class manages
/// the lifetime of the StringID, ensuring it is deallocated when its reference count goes to
/// zero.
/// \param stringID A pointer to a StringID allocated with "new"
/// \param index (optional) An index value to store along with the StringID. Defaults to zero.
StringIDRef(StringID* stringID, int index = 0)
: _sid(stringID),
_index(index)
{
if (_sid) {
_sid->ref();
}
}
/// Copy construction results in an incremented reference count for the stored StringID
StringIDRef(const StringIDRef& other)
: _sid(other._sid),
_index(other._index)
{
if (_sid) {
_sid->ref();
}
}
/// Move construction does NOT increase the reference count of the StringID (instead, it
/// invalidates the pointer in the moved object).
StringIDRef(StringIDRef&& other) noexcept
: _sid(other._sid),
_index(other._index)
{
other._sid = nullptr;
}
StringIDRef(const StringIDRef& other, int index)
: _sid(other._sid),
_index(index)
{
if (_sid) {
_sid->ref();
}
}
~StringIDRef()
{
if (_sid) {
_sid->unref();
}
}
void reset(const StringIDRef& stringID = StringIDRef())
{
*this = stringID;
}
void reset(const StringIDRef& stringID, int index)
{
*this = stringID;
this->_index = index;
}
void swap(StringIDRef& stringID)
{
if (*this != stringID) {
auto tmp = stringID;
stringID = *this;
*this = tmp;
}
}
StringIDRef& operator=(StringID* stringID)
{
if (_sid == stringID) {
return *this;
}
if (_sid) {
_sid->unref();
}
_sid = stringID;
if (_sid) {
_sid->ref();
}
this->_index = 0;
return *this;
}
StringIDRef& operator=(const StringIDRef& stringID)
{
if (&stringID == this) {
return *this;
}
if (_sid != stringID._sid) {
if (_sid) {
_sid->unref();
}
_sid = stringID._sid;
if (_sid) {
_sid->ref();
}
}
this->_index = stringID._index;
return *this;
}
StringIDRef& operator=(StringIDRef&& stringID) noexcept
{
if (_sid != stringID._sid) {
if (_sid) {
_sid->unref();
}
_sid = stringID._sid;
stringID._sid = nullptr;
}
this->_index = stringID._index;
return *this;
}
bool operator<(const StringIDRef& stringID) const
{
if (!stringID._sid) {
return false;
}
if (!_sid) {
return true;
}
int res = _sid->compare(*stringID._sid);
if (res < 0) {
return true;
}
if (res > 0) {
return false;
}
return _index < stringID._index;
}
bool operator==(const StringIDRef& stringID) const
{
if (_sid && stringID._sid) {
return _sid->compare(*stringID._sid) == 0 && _index == stringID._index;
}
return _sid == stringID._sid;
}
bool operator!=(const StringIDRef& stringID) const
{
return !(*this == stringID);
}
explicit operator bool() const
{
return _sid != nullptr;
}
int getRefCount() const
{
if (_sid) {
return _sid->getRefCount();
}
return 0;
}
std::string toString() const
{
if (_sid) {
return _sid->toString(_index);
}
return {};
}
std::string dataToText() const
{
if (_sid) {
return _sid->dataToText(_index);
}
return {};
}
/// Get a reference to the data: only makes sense if index and postfix are both empty, but
/// calling code is responsible for ensuring that.
const char* constData() const
{
if (_sid) {
assert(_index == 0);
assert(_sid->postfix().isEmpty());
return _sid->data().constData();
}
return "";
}
const StringID& deref() const
{
return *_sid;
}
long value() const
{
if (_sid) {
return _sid->value();
}
return 0;
}
QVector<StringIDRef> relatedIDs() const
{
if (_sid) {
return _sid->relatedIDs();
}
return {};
}
bool isBinary() const
{
if (_sid) {
return _sid->isBinary();
}
return false;
}
bool isHashed() const
{
if (_sid) {
return _sid->isHashed();
}
return false;
}
void toBytes(QByteArray& bytes) const
{
if (_sid) {
bytes = _sid->dataToBytes(_index);
}
}
PyObject* getPyObject()
{
if (_sid) {
return _sid->getPyObjectWithIndex(_index);
}
Py_INCREF(Py_None);
return Py_None;
}
void mark() const
{
if (_sid) {
_sid->mark();
}
}
bool isMarked() const
{
return _sid && _sid->isMarked();// NOLINT
}
bool isFromSameHasher(const StringHasherRef& hasher) const
{
return _sid && _sid->isFromSameHasher(hasher);// NOLINT
}
StringHasherRef getHasher() const
{
if (_sid) {
return _sid->getHasher();
}
return {};
}
void setPersistent(bool enable)
{
if (_sid) {
_sid->setPersistent(enable);
}
}
/// Used predominantly by the unit test code to verify that index is set correctly. In general
/// user code should not need to call this function.
int getIndex() const
{
return _index;
}
friend class StringHasher;
private:
StringID* _sid;
int _index;
};
/// \brief A bidirectional map of strings and their integer identifier.
///
/// Maps an arbitrary text string to a unique integer ID, maintaining a reference-counted shared
/// pointer for each. This permits elimination of unused strings based on their reference
/// count. If a duplicate string is added, no additional copy is made, and a new reference to the
/// original storage is returned (incrementing the reference counter of the instance).
///
/// If the string is longer than a given threshold, instead of storing the string, its SHA1 hash is
/// stored (and the original string discarded). This allows an upper threshold on the length of a
/// stored string, while still effectively guaranteeing uniqueness in the table.
class AppExport StringHasher: public Base::Persistence, public Base::Handled
{
TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
public:
StringHasher();
~StringHasher() override;
StringHasher(const StringHasher&) = delete;
StringHasher(StringHasher&&) noexcept = delete;
StringHasher& operator=(StringHasher& other) = delete;
StringHasher& operator=(StringHasher&& other) noexcept = delete;
unsigned int getMemSize() const override;
void Save(Base::Writer& /*writer*/) const override;
void Restore(Base::XMLReader& /*reader*/) override;
void SaveDocFile(Base::Writer& /*writer*/) const override;
void RestoreDocFile(Base::Reader& /*reader*/) override;
void setPersistenceFileName(const char* name) const;
const std::string& getPersistenceFileName() const;
/** Maps an arbitrary string to an integer
*
* @param text: input string.
* @param len: length of the string: optional if the string is null-terminated.
* @param hashable: whether hashing the string is permitted.
* @return A shared pointer to the internally-stored StringID.
*
* Maps an arbitrary text string to a unique integer ID, returning a reference-counted shared
* pointer to the StringID. This permits elimination of unused strings based on their reference
* count. If a duplicate string is added, no additional copy is made, and a new reference to the
* original storage is returned (incrementing the reference counter of the instance).
*
* If \c hashable is true and the string is longer than the threshold setting of this
* StringHasher, only the SHA1 hash of the string is stored: the original content of the string
* is discarded. If \c hashable is false, the string is copied and stored inside a StringID
* instance.
*
* The purpose of this function is to provide a short form of a stable string identification.
*/
StringIDRef getID(const char* text, int len = -1, bool hashable = false);
/// Options for string string data
enum class Option
{
/// No option is set
None = 0,
/// The input data is binary
Binary = 1 << 0,
/// Hashing is permitted for this input data. If the data length is longer than the
/// threshold setting of the StringHasher, it will be sha1 hashed before storing, and the
/// original content of the string is discarded.
Hashable = 1 << 1,
/// Do not copy the data: assume it is constant and exists for the lifetime of this hasher.
/// If this option is not set, the data will be copied before storing.
NoCopy = 1 << 2,
};
using Options = Base::Flags<Option>;
/** Map text or binary data to an integer
*
* @param data: input data.
* @param options: options describing how to store the data.
* @return A shared pointer to the internally stored StringID.
*
* \sa getID (const char*, int, bool);
*/
StringIDRef getID(const QByteArray& data, Options options = Option::Hashable);
/** Map geometry element name to an integer */
StringIDRef getID(const Data::MappedName& name, const QVector<StringIDRef>& sids);
/** Obtain the reference counted StringID object from numerical id
*
* @param id: string ID
* @param index: optional index of the string ID
* @return Return a shared pointer to the internally stored StringID.
*
* This function exists because the stored string may be one way hashed,
* and the original text is not persistent. The caller use this function to
* retrieve the reference count ID object after restore
*/
StringIDRef getID(long id, int index = 0) const;
/** Obtain the reference counted StringID object from numerical id and index
*
* @param id: string ID with index
* @return Return a shared pointer to the internally stored StringID.
*/
StringIDRef getID(const StringID::IndexID& id) const
{
return getID(id.id, id.index);
}
std::map<long, StringIDRef> getIDMap() const;
/// Clear all string hashes
void clear();
/// Size of the hash table
size_t size() const;
/// Return the number of hashes that are used by others
size_t count() const;
PyObject* getPyObject() override;
/** Enable/disable saving all string ID
*
* If saveAll is true, then compact() does nothing even when called explicitly. Setting
* saveAll it to false causes compact() to be run immediately.
*/
void setSaveAll(bool enable);
bool getSaveAll() const;
/** Set threshold of string hashing
*
* For hashable strings that are longer than this threshold, the string will
* be replaced by its sha1 hash.
*/
void setThreshold(int threshold);
int getThreshold() const;
/** Clear internal marks
*
* The internal marks on internally stored StringID instances are used to
* check if the StringID is used.
*/
void clearMarks() const;
/// Compact string storage by eliminating unused strings from the table.
void compact();
class HashMap;
friend class StringID;
protected:
StringID* insert(const StringIDRef& sid);
long lastID() const;
void saveStream(std::ostream& stream) const;
void restoreStream(std::istream& stream, std::size_t count);
void restoreStreamNew(std::istream& stream, std::size_t count);
private:
std::unique_ptr<HashMap> _hashes;///< Bidirectional map of StringID and its index (a long int).
mutable std::string _filename;
};
}// namespace App
ENABLE_BITMASK_OPERATORS(App::StringID::Flag)
ENABLE_BITMASK_OPERATORS(App::StringHasher::Option)
namespace App
{
inline bool StringID::isBinary() const
{
return _flags.testFlag(Flag::Binary);
}
inline bool StringID::isHashed() const
{
return _flags.testFlag(Flag::Hashed);
}
inline bool StringID::isPostfixed() const
{
return _flags.testFlag(Flag::Postfixed);
}
inline bool StringID::isPostfixEncoded() const
{
return _flags.testFlag(Flag::PostfixEncoded);
}
inline bool StringID::isIndexed() const
{
return _flags.testFlag(Flag::Indexed);
}
inline bool StringID::isPrefixID() const
{
return _flags.testFlag(Flag::PrefixID);
}
inline bool StringID::isPrefixIDIndex() const
{
return _flags.testFlag(Flag::PrefixIDIndex);
}
inline bool StringID::isMarked() const
{
return _flags.testFlag(Flag::Marked);
}
inline bool StringID::isPersistent() const
{
return _flags.testFlag(Flag::Persistent);
}
inline void StringID::setPersistent(bool enable)
{
_flags.setFlag(Flag::Persistent, enable);
}
}// namespace App
#endif// APP_STRING_ID_H

View File

@@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<GenerateModel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="generateMetaModel_Module.xsd">
<PythonExport
Father="BaseClassPy"
Name="StringHasherPy"
Twin="StringHasher"
TwinPointer="StringHasher"
Include="App/StringHasher.h"
FatherInclude="Base/BaseClassPy.h"
Namespace="App"
FatherNamespace="Base"
Constructor="true"
Reference="true">
<Documentation>
<Author Licence="LGPL" Name="Zheng, Lei" EMail="realthunder.dev@gmail.com" />
<DeveloperDocu>This is the StringHasher class</DeveloperDocu>
<UserDocu>This is the StringHasher class</UserDocu>
</Documentation>
<Methode Name="getID">
<Documentation>
<UserDocu>
getID(txt|id, base64=False) -> StringID
If the input is text, return a StringID object that is unique within this hasher. This
StringID object is reference counted. The hasher may only save hash ID's that are used.
If the input is an integer, then the hasher will try to find the StringID object stored
with the same integer value.
base64: indicate if the input 'txt' is base64 encoded binary data
</UserDocu>
</Documentation>
</Methode>
<Methode Name="isSame" Const="true">
<Documentation>
<UserDocu>Check if two hasher are the same</UserDocu>
</Documentation>
</Methode>
<Attribute Name="Count" ReadOnly="true">
<Documentation>
<UserDocu>Return count of used hashes</UserDocu>
</Documentation>
<Parameter Name="Count" Type="Int" />
</Attribute>
<Attribute Name="Size" ReadOnly="true">
<Documentation>
<UserDocu>Return the size of the hashes</UserDocu>
</Documentation>
<Parameter Name="Size" Type="Int"/>
</Attribute>
<Attribute Name="SaveAll">
<Documentation>
<UserDocu>Whether to save all string hashes regardless of its use count</UserDocu>
</Documentation>
<Parameter Name="SaveAll" Type="Boolean"/>
</Attribute>
<Attribute Name="Threshold">
<Documentation>
<UserDocu>Data length exceed this threshold will be hashed before storing</UserDocu>
</Documentation>
<Parameter Name="Threshold" Type="Int"/>
</Attribute>
<Attribute Name="Table" ReadOnly="true">
<Documentation>
<UserDocu>Return the entire string table as Int->String dictionary</UserDocu>
</Documentation>
<Parameter Name="Table" Type="Dict"/>
</Attribute>
</PythonExport>
</GenerateModel>

View File

@@ -0,0 +1,148 @@
/****************************************************************************
* Copyright (c) 2018 Zheng Lei (realthunder) <realthunder.dev@gmail.com> *
* *
* This file is part of the FreeCAD CAx development system. *
* *
* This library is free software; you can redistribute it and/or *
* modify it under the terms of the GNU Library General Public *
* License as published by the Free Software Foundation; either *
* version 2 of the License, or (at your option) any later version. *
* *
* This library is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Library General Public License for more details. *
* *
* You should have received a copy of the GNU Library General Public *
* License along with this library; see the file COPYING.LIB. If not, *
* write to the Free Software Foundation, Inc., 59 Temple Place, *
* Suite 330, Boston, MA 02111-1307, USA *
* *
****************************************************************************/
#include "PreCompiled.h"
#include "StringHasher.h"
#include "StringHasherPy.h"
#include "StringHasherPy.cpp"
using namespace App;
// returns a string which represent the object e.g. when printed in python
std::string StringHasherPy::representation(void) const
{
std::ostringstream str;
str << "<StringHasher at " << getStringHasherPtr() << ">";
return str.str();
}
PyObject *StringHasherPy::PyMake(struct _typeobject *, PyObject *, PyObject *) // Python wrapper
{
return new StringHasherPy(new StringHasher);
}
// constructor method
int StringHasherPy::PyInit(PyObject* , PyObject* )
{
return 0;
}
PyObject* StringHasherPy::isSame(PyObject *args)
{
PyObject *other;
if (!PyArg_ParseTuple(args, "O!", &StringHasherPy::Type, &other)){ // convert args: Python->C
return Py::new_reference_to(Py::False());
}
auto otherHasher = static_cast<StringHasherPy*>(other)->getStringHasherPtr();
return Py::new_reference_to(Py::Boolean(getStringHasherPtr() == otherHasher));
}
PyObject* StringHasherPy::getID(PyObject *args)
{
long id = -1;
int index = 0;
PyObject *value = 0;
PyObject *base64 = Py_False;
if (!PyArg_ParseTuple(args, "l|i",&id,&index)) {
PyErr_Clear();
if (!PyArg_ParseTuple(args, "O|O",&value,&base64))
return NULL; // NULL triggers exception
}
if(id>0) {
PY_TRY {
auto sid = getStringHasherPtr()->getID(id, index);
if(!sid) Py_Return;
return sid.getPyObject();
}PY_CATCH;
}
std::string txt;
#if PY_MAJOR_VERSION >= 3
if (PyUnicode_Check(value)) {
txt = PyUnicode_AsUTF8(value);
}
#else
if (PyUnicode_Check(value)) {
PyObject* unicode = PyUnicode_AsLatin1String(value);
txt = PyString_AsString(unicode);
Py_DECREF(unicode);
}
else if (PyString_Check(value)) {
txt = PyString_AsString(value);
}
#endif
else
throw Py::TypeError("expect argument of type string");
PY_TRY {
QByteArray data;
StringIDRef sid;
if(PyObject_IsTrue(base64)) {
data = QByteArray::fromBase64(QByteArray::fromRawData(txt.c_str(),txt.size()));
sid = getStringHasherPtr()->getID(data,true);
}else
sid = getStringHasherPtr()->getID(txt.c_str(),txt.size());
return sid.getPyObject();
}PY_CATCH;
}
Py::Int StringHasherPy::getCount(void) const {
return Py::Int((long)getStringHasherPtr()->count());
}
Py::Int StringHasherPy::getSize(void) const {
return Py::Int((long)getStringHasherPtr()->size());
}
Py::Boolean StringHasherPy::getSaveAll(void) const {
return Py::Boolean(getStringHasherPtr()->getSaveAll());
}
void StringHasherPy::setSaveAll(Py::Boolean value) {
getStringHasherPtr()->setSaveAll(value);
}
Py::Int StringHasherPy::getThreshold(void) const {
return Py::Int((long)getStringHasherPtr()->getThreshold());
}
void StringHasherPy::setThreshold(Py::Int value) {
getStringHasherPtr()->setThreshold(value);
}
Py::Dict StringHasherPy::getTable() const {
Py::Dict dict;
for(auto &v : getStringHasherPtr()->getIDMap())
dict.setItem(Py::Int(v.first),Py::String(v.second.dataToText()));
return dict;
}
PyObject *StringHasherPy::getCustomAttributes(const char* /*attr*/) const
{
return 0;
}
int StringHasherPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/)
{
return 0;
}

65
src/App/StringIDPy.xml Normal file
View File

@@ -0,0 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<GenerateModel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="generateMetaModel_Module.xsd">
<PythonExport
Father="BaseClassPy"
Name="StringIDPy"
Twin="StringID"
TwinPointer="StringID"
Include="App/StringHasher.h"
FatherInclude="Base/BaseClassPy.h"
Namespace="App"
FatherNamespace="Base"
Reference="true">
<Documentation>
<Author Licence="LGPL" Name="Zheng, Lei" EMail="realthunder.dev@gmail.com" />
<DeveloperDocu>This is the StringID class</DeveloperDocu>
<UserDocu>This is the StringID class</UserDocu>
</Documentation>
<Methode Name="isSame" Const="true">
<Documentation>
<UserDocu>Check if two StringIDs are the same</UserDocu>
</Documentation>
</Methode>
<Attribute Name="Value" ReadOnly="true">
<Documentation>
<UserDocu>Return the integer value of this ID</UserDocu>
</Documentation>
<Parameter Name="Value" Type="Int"/>
</Attribute>
<Attribute Name="Related" ReadOnly="true">
<Documentation>
<UserDocu>Return the related string IDs</UserDocu>
</Documentation>
<Parameter Name="Related" Type="List"/>
</Attribute>
<Attribute Name="Data" ReadOnly="true">
<Documentation>
<UserDocu>Return the data associated with this ID</UserDocu>
</Documentation>
<Parameter Name="Data" Type="String"/>
</Attribute>
<Attribute Name="IsBinary" ReadOnly="true">
<Documentation>
<UserDocu>Check if the data is binary, </UserDocu>
</Documentation>
<Parameter Name="IsBinary" Type="Boolean"/>
</Attribute>
<Attribute Name="IsHashed" ReadOnly="true">
<Documentation>
<UserDocu>Check if the data is hash, if so 'Data' returns a base64 encoded string of the raw hash</UserDocu>
</Documentation>
<Parameter Name="IsHashed" Type="Boolean"/>
</Attribute>
<Attribute Name="Index" ReadOnly="false">
<Documentation>
<UserDocu>Geometry index. Only meaningful for geometry element name</UserDocu>
</Documentation>
<Parameter Name="Index" Type="Int"/>
</Attribute>
<ClassDeclarations>private:
friend class StringID;
int _index = 0;
</ClassDeclarations>
</PythonExport>
</GenerateModel>

90
src/App/StringIDPyImp.cpp Normal file
View File

@@ -0,0 +1,90 @@
/***************************************************************************
* Copyright (c) 2018 Zheng Lei (realthunder) <realthunder.dev@gmail.com> *
* *
* This file is part of the FreeCAD CAx development system. *
* *
* This library is free software; you can redistribute it and/or *
* modify it under the terms of the GNU Library General Public *
* License as published by the Free Software Foundation; either *
* version 2 of the License, or (at your option) any later version. *
* *
* This library is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Library General Public License for more details. *
* *
* You should have received a copy of the GNU Library General Public *
* License along with this library; see the file COPYING.LIB. If not, *
* write to the Free Software Foundation, Inc., 59 Temple Place, *
* Suite 330, Boston, MA 02111-1307, USA *
* *
****************************************************************************/
#include "PreCompiled.h"
#include "StringHasher.h"
#include "StringIDPy.h"
#include "StringIDPy.cpp"
using namespace App;
// returns a string which represent the object e.g. when printed in python
std::string StringIDPy::representation() const
{
return getStringIDPtr()->toString(_index);
}
PyObject* StringIDPy::isSame(PyObject *args)
{
PyObject *other = nullptr;
if (PyArg_ParseTuple(args, "O!", &StringIDPy::Type, &other) == 0) { // convert args: Python->C
return Py::new_reference_to(Py::False());
}
auto *otherPy = static_cast<StringIDPy*>(other);
return Py::new_reference_to(Py::Boolean(
otherPy->getStringIDPtr() == this->getStringIDPtr()
&& otherPy->_index == this->_index));
}
Py::Int StringIDPy::getValue() const {
return Py::Int(getStringIDPtr()->value());
}
Py::List StringIDPy::getRelated() const {
Py::List list;
for (const auto &id : getStringIDPtr()->relatedIDs()) {
list.append(Py::Long(id.value()));
}
return list;
}
Py::String StringIDPy::getData() const {
return {Py::String(getStringIDPtr()->dataToText(this->_index))};
}
Py::Boolean StringIDPy::getIsBinary() const {
return {getStringIDPtr()->isBinary()};
}
Py::Boolean StringIDPy::getIsHashed() const {
return {getStringIDPtr()->isHashed()};
}
Py::Int StringIDPy::getIndex() const {
return Py::Int(this->_index);
}
void StringIDPy::setIndex(Py::Int index) {
this->_index = index;
}
PyObject *StringIDPy::getCustomAttributes(const char* /*attr*/) const
{
return nullptr;
}
int StringIDPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/)
{
return 0;
}

View File

@@ -123,6 +123,9 @@ public:
using u = typename std::underlying_type<Enum>::type;
return static_cast<u>(i) == static_cast<u>(f.i);
}
typename std::underlying_type<Enum>::type toUnderlyingType() const {
return static_cast<typename std::underlying_type<Enum>::type>(i);
}
};
}

View File

@@ -24,6 +24,7 @@
#include "PreCompiled.h"
#ifndef _PreComp_
#include <memory>
# include <xercesc/sax2/XMLReaderFactory.hpp>
#endif
@@ -42,6 +43,7 @@
#include <zipios++/zipios-config.h>
#endif
#include <zipios++/zipinputstream.h>
#include <boost/iostreams/filtering_stream.hpp>
XERCES_CPP_NAMESPACE_USE
@@ -283,6 +285,85 @@ void Base::XMLReader::readCharacters()
{
}
std::streamsize Base::XMLReader::read(char_type* s, std::streamsize n)
{
char_type* buf = s;
if (CharacterOffset < 0) {
return -1;
}
for (;;) {
std::streamsize copy_size =
static_cast<std::streamsize>(Characters.size()) - CharacterOffset;
if (n < copy_size) {
copy_size = n;
}
std::memcpy(s, Characters.c_str() + CharacterOffset, copy_size);
n -= copy_size;
s += copy_size;
CharacterOffset += copy_size;
if (!n) {
break;
}
if (ReadType == Chars) {
read();
}
else {
CharacterOffset = -1;
break;
}
}
return s - buf;
}
void Base::XMLReader::endCharStream()
{
CharacterOffset = -1;
CharStream.reset();
}
std::istream& Base::XMLReader::charStream()
{
if (!CharStream) {
throw Base::XMLParseException("no current character stream");
}
return *CharStream;
}
std::istream& Base::XMLReader::beginCharStream()
{
if (CharStream) {
throw Base::XMLParseException("recursive character stream");
}
// TODO: An XML element can actually contain a mix of child elements and
// characters. So we should not actually demand 'StartElement' here. But
// with the current implementation of character stream, we cannot track
// child elements and character content at the same time.
if (ReadType == StartElement) {
CharacterOffset = 0;
read();
}
else if (ReadType == StartEndElement) {
// If we are currently at a self-closing element, just leave the offset
// as negative and do not read any characters. This will result in an
// empty input stream for the caller.
CharacterOffset = -1;
}
else {
throw Base::XMLParseException("invalid state while reading character stream");
}
CharStream = std::make_unique<boost::iostreams::filtering_istream>();
auto* filteringStream = dynamic_cast<boost::iostreams::filtering_istream*>(CharStream.get());
filteringStream->push(boost::ref(*this));
return *CharStream;
}
void Base::XMLReader::readBinFile(const char* filename)
{
Base::FileInfo fi(filename);

View File

@@ -33,6 +33,8 @@
#include <xercesc/sax2/Attributes.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <boost/iostreams/concepts.hpp>
#include "FileInfo.h"
@@ -127,6 +129,13 @@ public:
XMLReader(const char* FileName, std::istream&);
~XMLReader() override;
/** @name boost iostream device interface */
//@{
using category = boost::iostreams::source_tag;
using char_type = char;
std::streamsize read(char_type* s, std::streamsize n);
//@}
bool isValid() const { return _valid; }
bool isVerbose() const { return _verbose; }
void setVerbose(bool on) { _verbose = on; }
@@ -157,6 +166,20 @@ public:
void readEndElement(const char* ElementName=nullptr, int level=-1);
/// read until characters are found
void readCharacters();
/** Obtain an input stream for reading characters
*
* @return Return a input stream for reading characters. The stream will be
* auto destroyed when you call with readElement() or readEndElement(), or
* you can end it explicitly with endCharStream().
*/
std::istream &beginCharStream();
/// Manually end the current character stream
void endCharStream();
/// Obtain the current character stream
std::istream &charStream();
//@}
/// read binary file
void readBinFile(const char*);
//@}
@@ -259,6 +282,7 @@ protected:
std::string LocalName;
std::string Characters;
unsigned int CharacterCount;
std::streamsize CharacterOffset{-1};
std::map<std::string,std::string> AttrMap;
using AttrMapType = std::map<std::string,std::string>;
@@ -285,6 +309,8 @@ protected:
std::vector<std::string> FileNames;
std::bitset<32> StatusBits;
std::unique_ptr<std::istream> CharStream;
};
class BaseExport Reader : public std::istream

View File

@@ -25,6 +25,7 @@
#include <limits>
#include <locale>
#include <iomanip>
#include "Writer.h"
#include "Base64.h"
@@ -34,11 +35,43 @@
#include "Stream.h"
#include "Tools.h"
#include <boost/iostreams/filtering_stream.hpp>
#include <memory>
using namespace Base;
using namespace std;
using namespace zipios;
// boost iostream filter to escape ']]>' in text file saved into CDATA section.
// It does not check if the character is valid utf8 or not.
struct cdata_filter {
typedef char char_type;
typedef boost::iostreams::output_filter_tag category;
template<typename Device>
inline bool put(Device& dev, char c) {
switch(state) {
case 0:
case 1:
if(c == ']')
++state;
else
state = 0;
break;
case 2:
if(c == '>') {
static const char escape[] = "]]><![CDATA[";
boost::iostreams::write(dev,escape,sizeof(escape)-1);
}
state = 0;
break;
}
return boost::iostreams::put(dev,c);
}
int state = 0;
};
// ---------------------------------------------------------------------------
// Writer: Constructors and Destructor
@@ -55,6 +88,44 @@ Writer::Writer()
Writer::~Writer() = default;
std::ostream& Writer::beginCharStream()
{
if (CharStream) {
throw Base::RuntimeError("Writer::beginCharStream(): invalid state");
}
Stream() << "<![CDATA[";
CharStream = std::make_unique<boost::iostreams::filtering_ostream>();
auto* filteredStream = dynamic_cast<boost::iostreams::filtering_ostream*>(CharStream.get());
filteredStream->push(cdata_filter());
filteredStream->push(Stream());
*filteredStream << std::setprecision(std::numeric_limits<double>::digits10 + 1);
return *CharStream;
}
std::ostream& Writer::endCharStream()
{
if (CharStream) {
CharStream.reset();
Stream() << "]]>";
}
return Stream();
}
std::ostream& Writer::charStream()
{
if (!CharStream) {
throw Base::RuntimeError("Writer::endCharStream(): no current character stream");
}
return *CharStream;
}
void Writer::insertText(const std::string& s)
{
beginCharStream() << s;
endCharStream();
}
void Writer::insertAsciiFile(const char* FileName)
{
Base::FileInfo fi(FileName);

View File

@@ -72,6 +72,8 @@ public:
void insertAsciiFile(const char* FileName);
/// insert a binary file BASE64 coded as CDATA section in the XML file
void insertBinFile(const char* FileName);
/// insert text string as CDATA
void insertText(const std::string &s);
/** @name additional file writing */
//@{
@@ -115,6 +117,23 @@ public:
virtual std::ostream &Stream()=0;
/** Create an output stream for storing character content
* The input is assumed to be valid character with
* the current XML encoding, and will be enclosed inside
* CDATA section. The stream will scan the input and
* properly escape any CDATA ending inside.
* @return Returns an output stream.
*
* You must call endCharStream() to end the current character stream.
*/
std::ostream &beginCharStream();
/** End the current character output stream
* @return Returns the normal writer stream for convenience
*/
std::ostream &endCharStream();
/// Return the current character output stream
std::ostream &charStream();
/// name for underlying file saves
std::string ObjectName;
@@ -138,6 +157,8 @@ protected:
private:
Writer(const Writer&);
Writer& operator=(const Writer&);
std::unique_ptr<std::ostream> CharStream;
};

View File

@@ -37,6 +37,7 @@ endif()
add_executable(Tests_run)
add_subdirectory(lib)
add_subdirectory(src)
target_include_directories(Tests_run PUBLIC ${Python3_INCLUDE_DIRS})
target_link_libraries(Tests_run gtest_main ${Google_Tests_LIBS} FreeCADApp)
add_executable(Sketcher_tests_run)

View File

@@ -1,6 +1,7 @@
target_sources(
Tests_run
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/Application.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Branding.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Expression.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ElementMap.cpp
@@ -9,5 +10,5 @@ target_sources(
${CMAKE_CURRENT_SOURCE_DIR}/MappedElement.cpp
${CMAKE_CURRENT_SOURCE_DIR}/MappedName.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Metadata.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Application.cpp
${CMAKE_CURRENT_SOURCE_DIR}/StringHasher.cpp
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
#include "gtest/gtest.h"
#include <Base/Bitmask.h>
enum class TestFlagEnum {
Flag1,
Flag2,
Flag3
};
class BitmaskTest: public ::testing::Test
{
protected:
// void SetUp() override {};
// void TearDown() override {};
};
TEST_F(BitmaskTest, toUnderlyingType)
{
// Arrange
Base::Flags<TestFlagEnum> flag1 {TestFlagEnum::Flag1};
// Act
auto result = flag1.toUnderlyingType();
// Assert
EXPECT_EQ(typeid(result), typeid(int));
}

View File

@@ -1,9 +1,12 @@
target_sources(
Tests_run
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/Bitmask.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Matrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Rotation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tst_Tools.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Unit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Quantity.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Reader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Rotation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Unit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Writer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tst_Tools.cpp
)

212
tests/src/Base/Reader.cpp Normal file
View File

@@ -0,0 +1,212 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
#include "gtest/gtest.h"
#include "Base/Exception.h"
#include "Base/Reader.h"
#include <array>
#include <filesystem>
#include <fmt/format.h>
#include <fstream>
namespace fs = std::filesystem;
class ReaderTest: public ::testing::Test
{
protected:
void SetUp() override
{
xercesc_3_2::XMLPlatformUtils::Initialize();
_tempDir = fs::temp_directory_path();
std::string filename = "unit_test_Reader.xml";
_tempFile = _tempDir / filename;
}
void TearDown() override
{
if (std::filesystem::exists(_tempFile)) {
std::filesystem::remove(_tempFile);
}
}
void givenDataAsXMLStream(const std::string& data)
{
auto stringData =
R"(<?xml version="1.0" encoding="UTF-8"?><document>)" + data + "</document>";
std::istringstream stream(stringData);
std::ofstream fileStream(_tempFile);
fileStream.write(stringData.data(), static_cast<std::streamsize>(stringData.length()));
fileStream.close();
std::ifstream inputStream(_tempFile);
_reader = std::make_unique<Base::XMLReader>(_tempFile.string().c_str(), inputStream);
}
Base::XMLReader* Reader()
{
return _reader.get();
}
private:
std::unique_ptr<Base::XMLReader> _reader;
fs::path _tempDir;
fs::path _tempFile;
};
TEST_F(ReaderTest, beginCharStreamNormal)
{
// Arrange
givenDataAsXMLStream("<data>Test ASCII data</data>");
Reader()->readElement("data");
// Act
auto& result = Reader()->beginCharStream();
// Assert
EXPECT_TRUE(result.good());
}
TEST_F(ReaderTest, beginCharStreamOpenClose)
{
// Arrange
givenDataAsXMLStream("<data id='12345' />");
Reader()->readElement("data");
// Act
auto& result = Reader()->beginCharStream();// Not an error, even though there is no data
// Assert
EXPECT_TRUE(result.good());
}
TEST_F(ReaderTest, beginCharStreamAlreadyBegun)
{
// Arrange
givenDataAsXMLStream("<data>Test ASCII data</data>");
Reader()->readElement("data");
Reader()->beginCharStream();
// Act & Assert
EXPECT_THROW(Reader()->beginCharStream(), Base::XMLParseException);
}
TEST_F(ReaderTest, charStreamGood)
{
// Arrange
givenDataAsXMLStream("<data>Test ASCII data</data>");
Reader()->readElement("data");
Reader()->beginCharStream();
// Act
auto& result = Reader()->charStream();
// Assert
EXPECT_TRUE(result.good());
}
TEST_F(ReaderTest, charStreamBad)
{
// Arrange
givenDataAsXMLStream("<data>Test ASCII data</data>");
Reader()->readElement("data");
// Act & Assert
EXPECT_THROW(Reader()->charStream(), Base::XMLParseException);
}
TEST_F(ReaderTest, endCharStreamGood)
{
// Arrange
givenDataAsXMLStream("<data>Test ASCII data</data>");
Reader()->readElement("data");
Reader()->beginCharStream();
// Act & Assert
Reader()->endCharStream();// Does not throw
}
TEST_F(ReaderTest, endCharStreamBad)
{
// Arrange
givenDataAsXMLStream("<data>Test ASCII data</data>");
Reader()->readElement("data");
// Do not open the stream...
// Act & Assert
Reader()->endCharStream();// Does not throw, even with no open stream
}
TEST_F(ReaderTest, readDataSmallerThanBuffer)
{
// Arrange
constexpr size_t bufferSize {20};
std::string expectedData {"Test ASCII data"};
givenDataAsXMLStream("<data>" + expectedData + "</data>");
Reader()->readElement("data");
Reader()->beginCharStream();
std::array<char, bufferSize> buffer {};
// Act
auto bytesRead = Reader()->read(buffer.data(), bufferSize);
// Assert
EXPECT_STREQ(expectedData.c_str(), buffer.data());
EXPECT_EQ(expectedData.length(), bytesRead);
}
TEST_F(ReaderTest, readDataLargerThanBuffer)
{
// Arrange
constexpr size_t bufferSize {5};
std::string expectedData {"Test ASCII data"};
givenDataAsXMLStream("<data>" + expectedData + "</data>");
Reader()->readElement("data");
Reader()->beginCharStream();
std::array<char, bufferSize> buffer {};
// Act
auto bytesRead = Reader()->read(buffer.data(), bufferSize);
// Assert
for (size_t i = 0; i < bufferSize; ++i) {
EXPECT_EQ(expectedData[i], buffer.at(i));
}
EXPECT_EQ(bufferSize, bytesRead);
}
TEST_F(ReaderTest, readDataLargerThanBufferSecondRead)
{
// Arrange
constexpr size_t bufferSize {5};
std::string expectedData {"Test ASCII data"};
givenDataAsXMLStream("<data>" + expectedData + "</data>");
Reader()->readElement("data");
Reader()->beginCharStream();
std::array<char, bufferSize> buffer {};
Reader()->read(buffer.data(), bufferSize);// Read the first five bytes
// Act
auto bytesRead = Reader()->read(buffer.data(), bufferSize);// Second five bytes
// Assert
for (size_t i = 0; i < bufferSize; ++i) {
EXPECT_EQ(expectedData[i + bufferSize], buffer.at(i));
}
EXPECT_EQ(bufferSize, bytesRead);
}
TEST_F(ReaderTest, readDataNotStarted)
{
// Arrange
constexpr size_t bufferSize {20};
std::string expectedData {"Test ASCII data"};
givenDataAsXMLStream("<data>" + expectedData + "</data>");
Reader()->readElement("data");
std::array<char, bufferSize> buffer {};
// Act
auto bytesRead = Reader()->read(buffer.data(), bufferSize);
// Assert
EXPECT_EQ(-1, bytesRead);// Because we didn't call beginCharStream
}

118
tests/src/Base/Writer.cpp Normal file
View File

@@ -0,0 +1,118 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
#include "gtest/gtest.h"
#include "Base/Exception.h"
#include "Base/Writer.h"
// Writer is designed to be a base class, so for testing we actually instantiate a StringWriter,
// which is derived from it
class WriterTest : public ::testing::Test {
protected:
//void SetUp() override {}
// void TearDown() override {}
protected:
Base::StringWriter _writer;
};
TEST_F(WriterTest, insertTextSimple)
{
// Arrange
std::string testTextData {"Simple ASCII data"};
std::string expectedResult {"<![CDATA[" + testTextData + "]]>"};
// Act
_writer.insertText(testTextData);
// Assert
EXPECT_EQ(expectedResult, _writer.getString());
}
/// If the data happens to actually include an XML CDATA close marker, that needs to be "escaped" --
/// this is done by breaking it up into two separate CDATA sections, splitting apart the marker.
TEST_F(WriterTest, insertTextNeedsEscape)
{
// Arrange
std::string testDataA {"ASCII data with a close marker in it, like so: ]]"};
std::string testDataB {"> "};
std::string expectedResult {"<![CDATA[" + testDataA + "]]><![CDATA[" + testDataB + "]]>"};
// Act
_writer.insertText(testDataA + testDataB);
// Assert
EXPECT_EQ(expectedResult, _writer.getString());
}
TEST_F(WriterTest, insertNonAsciiData)
{
// Arrange
std::string testData {"\x01\x02\x03\x04\u0001F450😀"};
std::string expectedResult {"<![CDATA[" + testData + "]]>"};
// Act
_writer.insertText(testData);
// Assert
EXPECT_EQ(expectedResult, _writer.getString());
}
TEST_F(WriterTest, beginCharStream)
{
// Arrange & Act
auto & checkStream {_writer.beginCharStream()};
// Assert
EXPECT_TRUE(checkStream.good());
}
TEST_F(WriterTest, beginCharStreamTwice)
{
// Arrange
_writer.beginCharStream();
// Act & Assert
EXPECT_THROW(
_writer.beginCharStream(),
Base::RuntimeError
);
}
TEST_F(WriterTest, endCharStream)
{
// Arrange
_writer.beginCharStream();
// Act
_writer.endCharStream();
// Assert
EXPECT_EQ("<![CDATA[]]>", _writer.getString());
}
TEST_F(WriterTest, endCharStreamTwice)
{
// Arrange
_writer.beginCharStream();
_writer.endCharStream();
// Act
_writer.endCharStream(); // Doesn't throw, or do anything at all
// Assert
EXPECT_EQ("<![CDATA[]]>", _writer.getString());
}
TEST_F(WriterTest, charStream)
{
// Arrange
auto& streamA {_writer.beginCharStream()};
// Act
auto& streamB {_writer.charStream()};
// Assert
EXPECT_EQ(&streamA, &streamB);
}