Merge pull request #9148 from chennes/toponamingStringHasher
App/Toponaming: String hasher
This commit is contained in:
@@ -90,6 +90,8 @@ generate_from_xml(GeoFeatureGroupExtensionPy)
|
||||
generate_from_xml(MetadataPy)
|
||||
generate_from_xml(OriginGroupExtensionPy)
|
||||
generate_from_xml(PartPy)
|
||||
generate_from_xml(StringHasherPy)
|
||||
generate_from_xml(StringIDPy)
|
||||
|
||||
generate_from_xml(ComplexGeoDataPy)
|
||||
generate_from_xml(PropertyContainerPy)
|
||||
@@ -115,6 +117,8 @@ SET(FreeCADApp_XML_SRCS
|
||||
PropertyContainerPy.xml
|
||||
ComplexGeoDataPy.xml
|
||||
MaterialPy.xml
|
||||
StringHasherPy.xml
|
||||
StringIDPy.xml
|
||||
)
|
||||
SOURCE_GROUP("XML" FILES ${FreeCADApp_XML_SRCS})
|
||||
|
||||
@@ -270,6 +274,9 @@ SET(FreeCADApp_CPP_SRCS
|
||||
MaterialPyImp.cpp
|
||||
Metadata.cpp
|
||||
MetadataPyImp.cpp
|
||||
StringHasher.cpp
|
||||
StringHasherPyImp.cpp
|
||||
StringIDPyImp.cpp
|
||||
)
|
||||
|
||||
SET(FreeCADApp_HPP_SRCS
|
||||
@@ -288,6 +295,7 @@ SET(FreeCADApp_HPP_SRCS
|
||||
MappedElement.h
|
||||
Material.h
|
||||
Metadata.h
|
||||
StringHasher.h
|
||||
)
|
||||
|
||||
SET(FreeCADApp_SRCS
|
||||
|
||||
869
src/App/StringHasher.cpp
Normal file
869
src/App/StringHasher.cpp
Normal file
@@ -0,0 +1,869 @@
|
||||
// SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
|
||||
/***************************************************************************************************
|
||||
* *
|
||||
* Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com> *
|
||||
* Copyright (c) 2023 FreeCAD Project Association *
|
||||
* *
|
||||
* This file is part of FreeCAD. *
|
||||
* *
|
||||
* FreeCAD is free software: you can redistribute it and/or modify it under the terms of the *
|
||||
* GNU Lesser General Public License as published by the Free Software Foundation, either *
|
||||
* version 2.1 of the License, or (at your option) any later version. *
|
||||
* *
|
||||
* FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; *
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
|
||||
* See the GNU Lesser General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU Lesser General Public License along with *
|
||||
* FreeCAD. If not, see <https://www.gnu.org/licenses/>. *
|
||||
* *
|
||||
**************************************************************************************************/
|
||||
|
||||
#include "PreCompiled.h"
|
||||
|
||||
#include <QCryptographicHash>
|
||||
#include <QHash>
|
||||
#include <deque>
|
||||
|
||||
#include <Base/Console.h>
|
||||
#include <Base/Reader.h>
|
||||
#include <Base/Stream.h>
|
||||
#include <Base/Writer.h>
|
||||
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/bimap.hpp>
|
||||
#include <boost/bimap/set_of.hpp>
|
||||
#include <boost/bimap/unordered_set_of.hpp>
|
||||
#include <boost/iostreams/stream.hpp>
|
||||
|
||||
#include "MappedElement.h"
|
||||
#include "StringHasher.h"
|
||||
#include "StringHasherPy.h"
|
||||
#include "StringIDPy.h"
|
||||
|
||||
|
||||
FC_LOG_LEVEL_INIT("App", true, true)
|
||||
|
||||
namespace bio = boost::iostreams;
|
||||
using namespace App;
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
struct StringIDHasher
|
||||
{
|
||||
std::size_t operator()(const StringID* sid) const
|
||||
{
|
||||
if (!sid) {
|
||||
return 0;
|
||||
}
|
||||
return qHash(sid->data(), qHash(sid->postfix()));
|
||||
}
|
||||
|
||||
bool operator()(const StringID* IDa, const StringID* IDb) const
|
||||
{
|
||||
if (IDa == IDb) {
|
||||
return true;
|
||||
}
|
||||
if (!IDa || !IDb) {
|
||||
return false;
|
||||
}
|
||||
return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix();
|
||||
}
|
||||
};
|
||||
|
||||
using HashMapBase =
|
||||
boost::bimap<boost::bimaps::unordered_set_of<StringID*, StringIDHasher, StringIDHasher>,
|
||||
boost::bimaps::set_of<long>>;
|
||||
|
||||
class StringHasher::HashMap: public HashMapBase
|
||||
{
|
||||
public:
|
||||
bool SaveAll = false;
|
||||
int Threshold = 0;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass)
|
||||
|
||||
StringID::~StringID()
|
||||
{
|
||||
if (_hasher) {
|
||||
_hasher->_hashes->right.erase(_id);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject* StringID::getPyObject()
|
||||
{
|
||||
return new StringIDPy(this);
|
||||
}
|
||||
|
||||
PyObject* StringID::getPyObjectWithIndex(int index)
|
||||
{
|
||||
auto res = new StringIDPy(this);
|
||||
res->_index = index;
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string StringID::toString(int index) const
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << '#' << std::hex << value();
|
||||
if (index != 0) {
|
||||
ss << ':' << index;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
StringID::IndexID StringID::fromString(const char* name, bool eof, int size)
|
||||
{
|
||||
IndexID res {};
|
||||
res.id = 0;
|
||||
res.index = 0;
|
||||
if (!name) {
|
||||
res.id = -1;
|
||||
return res;
|
||||
}
|
||||
if (size < 0) {
|
||||
size = static_cast<int>(std::strlen(name));
|
||||
}
|
||||
bio::stream<bio::array_source> iss(name, size);
|
||||
char sep = 0;
|
||||
char sep2 = 0;
|
||||
iss >> sep >> std::hex >> res.id >> sep2 >> res.index;
|
||||
if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) {
|
||||
res.id = -1;
|
||||
return res;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string StringID::dataToText(int index) const
|
||||
{
|
||||
if (isHashed() || isBinary()) {
|
||||
return _data.toBase64().constData();
|
||||
}
|
||||
|
||||
std::string res(_data.constData());
|
||||
if (index != 0) {
|
||||
res += std::to_string(index);
|
||||
}
|
||||
if (_postfix.size() != 0) {
|
||||
res += _postfix.constData();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void StringID::mark() const
|
||||
{
|
||||
if (isMarked()) {
|
||||
return;
|
||||
}
|
||||
_flags.setFlag(Flag::Marked);
|
||||
for (auto& sid : _sids) {
|
||||
sid.deref().mark();
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence)
|
||||
|
||||
StringHasher::StringHasher()
|
||||
: _hashes(new HashMap)
|
||||
{}
|
||||
|
||||
StringHasher::~StringHasher()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void StringHasher::setSaveAll(bool enable)
|
||||
{
|
||||
if (_hashes->SaveAll == enable) {
|
||||
return;
|
||||
}
|
||||
_hashes->SaveAll = enable;
|
||||
compact();
|
||||
}
|
||||
|
||||
void StringHasher::compact()
|
||||
{
|
||||
if (_hashes->SaveAll) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Make a list of all the table entries that have only a single reference and are not marked
|
||||
// "persistent"
|
||||
std::deque<StringIDRef> pendings;
|
||||
for (auto& hasher : _hashes->right) {
|
||||
if (!hasher.second->isPersistent() && hasher.second->getRefCount() == 1) {
|
||||
pendings.emplace_back(hasher.second);
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively remove the unused StringIDs
|
||||
while (!pendings.empty()) {
|
||||
StringIDRef sid = pendings.front();
|
||||
pendings.pop_front();
|
||||
// Try to erase the map entry for this StringID
|
||||
if (_hashes->right.erase(sid.value()) == 0U) {
|
||||
continue;// If nothing was erased, there's nothing more to do
|
||||
}
|
||||
sid._sid->_hasher = nullptr;
|
||||
sid._sid->unref();
|
||||
for (auto& hasher : sid._sid->_sids) {
|
||||
if (hasher._sid->_hasher == this && !hasher._sid->isPersistent()
|
||||
&& hasher._sid->getRefCount() == 2) {
|
||||
// If the related StringID also uses this hasher, is not marked persistent, and has
|
||||
// a current reference count of 2 (which will be its hasher reference and its entry
|
||||
// in the related SIDs list), then prep it for removal as well.
|
||||
pendings.push_back(hasher);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool StringHasher::getSaveAll() const
|
||||
{
|
||||
return _hashes->SaveAll;
|
||||
}
|
||||
|
||||
void StringHasher::setThreshold(int threshold)
|
||||
{
|
||||
_hashes->Threshold = threshold;
|
||||
}
|
||||
|
||||
int StringHasher::getThreshold() const
|
||||
{
|
||||
return _hashes->Threshold;
|
||||
}
|
||||
|
||||
long StringHasher::lastID() const
|
||||
{
|
||||
if (_hashes->right.empty()) {
|
||||
return 0;
|
||||
}
|
||||
auto it = _hashes->right.end();
|
||||
--it;
|
||||
return it->first;
|
||||
}
|
||||
|
||||
StringIDRef StringHasher::getID(const char* text, int len, bool hashable)
|
||||
{
|
||||
if (len < 0) {
|
||||
len = static_cast<int>(strlen(text));
|
||||
}
|
||||
return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None);
|
||||
}
|
||||
|
||||
StringIDRef StringHasher::getID(const QByteArray& data, Options options)
|
||||
{
|
||||
bool binary = options.testFlag(Option::Binary);
|
||||
bool hashable = options.testFlag(Option::Hashable);
|
||||
bool nocopy = options.testFlag(Option::NoCopy);
|
||||
|
||||
bool hashed = hashable && _hashes->Threshold > 0 && (int)data.size() > _hashes->Threshold;
|
||||
|
||||
StringID dataID;
|
||||
if (hashed) {
|
||||
QCryptographicHash hasher(QCryptographicHash::Sha1);
|
||||
hasher.addData(data);
|
||||
dataID._data = hasher.result();
|
||||
}
|
||||
else {
|
||||
dataID._data = data;
|
||||
}
|
||||
|
||||
auto it = _hashes->left.find(&dataID);
|
||||
if (it != _hashes->left.end()) {
|
||||
return {it->first};
|
||||
}
|
||||
|
||||
if (!hashed && !nocopy) {
|
||||
// if not hashed, make a deep copy of the data
|
||||
dataID._data = QByteArray(data.constData(), data.size());
|
||||
}
|
||||
|
||||
StringID::Flags flags(StringID::Flag::None);
|
||||
if (binary) {
|
||||
flags.setFlag(StringID::Flag::Binary);
|
||||
}
|
||||
if (hashed) {
|
||||
flags.setFlag(StringID::Flag::Hashed);
|
||||
}
|
||||
StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags));
|
||||
return {insert(sid)};
|
||||
}
|
||||
|
||||
StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<StringIDRef>& sids)
|
||||
{
|
||||
StringID tempID;
|
||||
tempID._postfix = name.postfixBytes();
|
||||
|
||||
Data::IndexedName indexed;
|
||||
if (tempID._postfix.size() != 0) {
|
||||
// Only check for IndexedName if there is postfix, because of the way
|
||||
// we restore the StringID. See StringHasher::saveStream/restoreStreamNew()
|
||||
indexed = Data::IndexedName(name.dataBytes());
|
||||
}
|
||||
if (indexed) {
|
||||
// If this is an IndexedName, then _data only stores the base part of the name, without the
|
||||
// integer index
|
||||
tempID._data =
|
||||
QByteArray::fromRawData(indexed.getType(), static_cast<int>(strlen(indexed.getType())));
|
||||
}
|
||||
else {
|
||||
// Store the entire name in _data, but temporarily re-use the existing memory
|
||||
tempID._data = name.dataBytes();
|
||||
}
|
||||
|
||||
// Check to see if there is already an entry in the hash table for this StringID
|
||||
auto it = _hashes->left.find(&tempID);
|
||||
if (it != _hashes->left.end()) {
|
||||
auto res = StringIDRef(it->first);
|
||||
if (indexed) {
|
||||
res._index = indexed.getIndex();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
if (!indexed && name.isRaw()) {
|
||||
// Make a copy of the memory if we didn't do so earlier
|
||||
tempID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
|
||||
}
|
||||
|
||||
// If the postfix is not already encoded, use getID to encode it:
|
||||
StringIDRef postfixRef;
|
||||
if ((tempID._postfix.size() != 0) && tempID._postfix.indexOf("#") < 0) {
|
||||
postfixRef = getID(tempID._postfix);
|
||||
postfixRef.toBytes(tempID._postfix);
|
||||
}
|
||||
|
||||
// If _data is an IndexedName, use getID to encode it:
|
||||
StringIDRef indexRef;
|
||||
if (indexed) {
|
||||
indexRef = getID(tempID._data);
|
||||
}
|
||||
|
||||
// The real StringID object that we are going to insert
|
||||
StringIDRef newStringIDRef(new StringID(lastID() + 1, tempID._data));
|
||||
StringID& newStringID = *newStringIDRef._sid;
|
||||
if (tempID._postfix.size() != 0) {
|
||||
newStringID._flags.setFlag(StringID::Flag::Postfixed);
|
||||
newStringID._postfix = tempID._postfix;
|
||||
}
|
||||
|
||||
// Count the related SIDs that use this hasher
|
||||
int numSIDs = 0;
|
||||
for (const auto& relatedID : sids) {
|
||||
if (relatedID && relatedID._sid->_hasher == this) {
|
||||
++numSIDs;
|
||||
}
|
||||
}
|
||||
|
||||
int numAddedSIDs = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
|
||||
if (numSIDs == sids.size() && !postfixRef && !indexRef) {
|
||||
// The simplest case: just copy the whole list
|
||||
newStringID._sids = sids;
|
||||
}
|
||||
else {
|
||||
// Put the added SIDs at the front of the SID list
|
||||
newStringID._sids.reserve(numSIDs + numAddedSIDs);
|
||||
if (postfixRef) {
|
||||
newStringID._flags.setFlag(StringID::Flag::PostfixEncoded);
|
||||
newStringID._sids.push_back(postfixRef);
|
||||
}
|
||||
if (indexRef) {
|
||||
newStringID._flags.setFlag(StringID::Flag::Indexed);
|
||||
newStringID._sids.push_back(indexRef);
|
||||
}
|
||||
// Append the sids from the input list whose hasher is this one
|
||||
for (const auto& relatedID : sids) {
|
||||
if (relatedID && relatedID._sid->_hasher == this) {
|
||||
newStringID._sids.push_back(relatedID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the number of related IDs is larger than some threshold (hardcoded to 10 right now), then
|
||||
// remove any duplicates (ignoring the new SIDs we may have just added)
|
||||
const int relatedIDSizeThreshold {10};
|
||||
if (newStringID._sids.size() > relatedIDSizeThreshold) {
|
||||
std::sort(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end());
|
||||
newStringID._sids.erase(
|
||||
std::unique(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end()),
|
||||
newStringID._sids.end());
|
||||
}
|
||||
|
||||
// If the new StringID has a postfix, but is not indexed, see if the data string itself
|
||||
// contains an index.
|
||||
if ((newStringID._postfix.size() != 0) && !indexed) {
|
||||
// Use the fromString function to parse the new StringID's data field for a possible index
|
||||
StringID::IndexID res = StringID::fromString(newStringID._data);
|
||||
if (res.id > 0) {// If the data had an index
|
||||
if (res.index != 0) {
|
||||
indexed.setIndex(res.index);
|
||||
newStringID._data.resize(newStringID._data.lastIndexOf(':')+1);
|
||||
}
|
||||
int offset = newStringID.isPostfixEncoded() ? 1 : 0;
|
||||
// Search for the SID with that index
|
||||
for (int i = offset; i < newStringID._sids.size(); ++i) {
|
||||
if (newStringID._sids[i].value() == res.id) {
|
||||
if (i != offset) {
|
||||
// If this SID is not already the first element in sids, move it there by
|
||||
// swapping it with whatever WAS there
|
||||
std::swap(newStringID._sids[offset], newStringID._sids[i]);
|
||||
}
|
||||
if (res.index != 0) {
|
||||
newStringID._flags.setFlag(StringID::Flag::PrefixIDIndex);
|
||||
}
|
||||
else {
|
||||
newStringID._flags.setFlag(StringID::Flag::PrefixID);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {insert(newStringIDRef), indexed.getIndex()};
|
||||
}
|
||||
|
||||
StringIDRef StringHasher::getID(long id, int index) const
|
||||
{
|
||||
if (id <= 0) {
|
||||
return {};
|
||||
}
|
||||
auto it = _hashes->right.find(id);
|
||||
if (it == _hashes->right.end()) {
|
||||
return {};
|
||||
}
|
||||
StringIDRef res(it->second);
|
||||
res._index = index;
|
||||
return res;
|
||||
}
|
||||
|
||||
void StringHasher::setPersistenceFileName(const char* filename) const
|
||||
{
|
||||
if (!filename) {
|
||||
filename = "";
|
||||
}
|
||||
_filename = filename;
|
||||
}
|
||||
|
||||
const std::string& StringHasher::getPersistenceFileName() const
|
||||
{
|
||||
return _filename;
|
||||
}
|
||||
|
||||
void StringHasher::Save(Base::Writer& writer) const
|
||||
{
|
||||
|
||||
size_t count = 0;
|
||||
if (_hashes->SaveAll) {
|
||||
count = _hashes->size();
|
||||
}
|
||||
else {
|
||||
count = 0;
|
||||
for (auto& hasher : _hashes->right) {
|
||||
if (hasher.second->isMarked() || hasher.second->isPersistent()) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.Stream() << writer.ind() << "<StringHasher saveall=\"" << _hashes->SaveAll
|
||||
<< "\" threshold=\"" << _hashes->Threshold << "\"";
|
||||
|
||||
if (count == 0U) {
|
||||
writer.Stream() << " count=\"0\"></StringHasher>\n";
|
||||
return;
|
||||
}
|
||||
|
||||
writer.Stream() << " count=\"0\" new=\"1\"/>\n";
|
||||
|
||||
writer.Stream() << writer.ind() << "<StringHasher2 ";
|
||||
if (!_filename.empty()) {
|
||||
writer.Stream() << " file=\"" << writer.addFile((_filename + ".txt").c_str(), this)
|
||||
<< "\"/>\n";
|
||||
return;
|
||||
}
|
||||
|
||||
writer.Stream() << " count=\"" << count << "\">\n";
|
||||
saveStream(writer.beginCharStream() << '\n');
|
||||
writer.endCharStream() << '\n';
|
||||
writer.Stream() << writer.ind() << "</StringHasher2>\n";
|
||||
}
|
||||
|
||||
void StringHasher::SaveDocFile(Base::Writer& writer) const
|
||||
{
|
||||
std::size_t count = _hashes->SaveAll ? this->size() : this->count();
|
||||
writer.Stream() << "StringTableStart v1 " << count << '\n';
|
||||
saveStream(writer.Stream());
|
||||
}
|
||||
|
||||
void StringHasher::saveStream(std::ostream& stream) const
|
||||
{
|
||||
boost::io::ios_flags_saver ifs(stream);
|
||||
stream << std::hex;
|
||||
|
||||
long anchor = 0;
|
||||
const StringID* last = nullptr;
|
||||
long lastID = 0;
|
||||
bool relative = false;
|
||||
|
||||
for (auto& hasher : _hashes->right) {
|
||||
auto& d = *hasher.second;
|
||||
long id = d._id;
|
||||
if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// We use relative coding to save space. But in order to have some
|
||||
// minimum protection against corruption, write an absolute value every
|
||||
// once a while.
|
||||
relative = (id - anchor) < 1000;
|
||||
if (relative) {
|
||||
stream << '-' << id - lastID;
|
||||
}
|
||||
else {
|
||||
anchor = id;
|
||||
stream << id;
|
||||
}
|
||||
lastID = id;
|
||||
|
||||
int offset = d.isPostfixEncoded() ? 1 : 0;
|
||||
|
||||
StringID::IndexID prefixID {};
|
||||
prefixID.id = 0;
|
||||
prefixID.index = 0;
|
||||
if (d.isPrefixID()) {
|
||||
assert(d._sids.size() > offset);
|
||||
prefixID.id = d._sids[offset].value();
|
||||
}
|
||||
else if (d.isPrefixIDIndex()) {
|
||||
prefixID = StringID::fromString(d._data);
|
||||
assert(d._sids.size() > offset && d._sids[offset].value() == prefixID.id);
|
||||
}
|
||||
|
||||
auto flags = d._flags;
|
||||
flags.setFlag(StringID::Flag::Marked, false);
|
||||
stream << '.' << flags.toUnderlyingType();
|
||||
|
||||
int position = 0;
|
||||
if (!relative) {
|
||||
for (; position < d._sids.size(); ++position) {
|
||||
stream << '.' << d._sids[position].value();
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (last) {
|
||||
for (; position < d._sids.size() && position < last->_sids.size(); ++position) {
|
||||
long m = last->_sids[position].value();
|
||||
long n = d._sids[position].value();
|
||||
if (n < m) {
|
||||
stream << ".-" << m - n;
|
||||
}
|
||||
else {
|
||||
stream << '.' << n - m;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (; position < d._sids.size(); ++position) {
|
||||
stream << '.' << id - d._sids[position].value();
|
||||
}
|
||||
}
|
||||
|
||||
last = &d;
|
||||
|
||||
// Having postfix means it is a geometry element name, which
|
||||
// guarantees to be a single line without space. So it is safe to
|
||||
// store in raw stream.
|
||||
if (d.isPostfixed()) {
|
||||
if (!d.isPrefixIDIndex() && !d.isIndexed() && !d.isPrefixID()) {
|
||||
stream << ' ' << d._data.constData();
|
||||
}
|
||||
|
||||
if (!d.isPostfixEncoded()) {
|
||||
stream << ' ' << d._postfix.constData();
|
||||
}
|
||||
stream << '\n';
|
||||
}
|
||||
else {
|
||||
// Reaching here means the string may contain space and newlines
|
||||
stream << ' ';
|
||||
stream << std::dec << d._data.constData() << std::hex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StringHasher::RestoreDocFile(Base::Reader& reader)
|
||||
{
|
||||
std::string marker;
|
||||
std::string ver;
|
||||
reader >> marker;
|
||||
std::size_t count = 0;
|
||||
_hashes->clear();
|
||||
if (marker == "StringTableStart") {
|
||||
reader >> ver >> count;
|
||||
if (ver != "v1") {
|
||||
FC_WARN("Unknown string table format");
|
||||
}
|
||||
restoreStreamNew(reader, count);
|
||||
return;
|
||||
}
|
||||
count = atoi(marker.c_str());
|
||||
restoreStream(reader, count);
|
||||
}
|
||||
|
||||
void StringHasher::restoreStreamNew(std::istream& stream, std::size_t count)
|
||||
{
|
||||
_hashes->clear();
|
||||
std::string content;
|
||||
boost::io::ios_flags_saver ifs(stream);
|
||||
stream >> std::hex;
|
||||
std::vector<std::string> tokens;
|
||||
long lastid = 0;
|
||||
const StringID* last = nullptr;
|
||||
|
||||
std::string tmp;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
if (!(stream >> tmp)) {
|
||||
FC_THROWM(Base::RuntimeError, "Invalid string table");
|
||||
}
|
||||
|
||||
tokens.clear();
|
||||
boost::split(tokens, tmp, boost::is_any_of("."));
|
||||
if (tokens.size() < 2) {
|
||||
FC_THROWM(Base::RuntimeError, "Invalid string table");
|
||||
}
|
||||
|
||||
long id = 0;
|
||||
bool relative = false;
|
||||
if (tokens[0][0] == '-') {
|
||||
relative = true;
|
||||
id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16);
|
||||
}
|
||||
else {
|
||||
id = strtol(tokens[0].c_str(), nullptr, 16);
|
||||
}
|
||||
|
||||
lastid = id;
|
||||
|
||||
unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16);
|
||||
StringIDRef sid(new StringID(id, QByteArray(), static_cast<StringID::Flag>(flag)));
|
||||
|
||||
StringID& d = *sid._sid;
|
||||
d._sids.reserve(tokens.size() - 2);
|
||||
|
||||
int j = 2;
|
||||
if (relative && last) {
|
||||
for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) {
|
||||
long m = last->_sids[j - 2].value();
|
||||
long n;
|
||||
if (tokens[j][0] == '-') {
|
||||
n = -strtol(&tokens[j][1], nullptr, 16);
|
||||
}
|
||||
else {
|
||||
n = strtol(&tokens[j][0], nullptr, 16);
|
||||
}
|
||||
StringIDRef sid = getID(m + n);
|
||||
if (!sid) {
|
||||
FC_THROWM(Base::RuntimeError, "Invalid string id reference");
|
||||
}
|
||||
d._sids.push_back(sid);
|
||||
}
|
||||
}
|
||||
for (; j < (int)tokens.size(); ++j) {
|
||||
long n = strtol(tokens[j].data(), nullptr, 16);
|
||||
StringIDRef sid = getID(relative ? id - n : n);
|
||||
if (!sid) {
|
||||
FC_THROWM(Base::RuntimeError, "Invalid string id reference");
|
||||
}
|
||||
d._sids.push_back(sid);
|
||||
}
|
||||
|
||||
if (!d.isPostfixed()) {
|
||||
stream >> content;
|
||||
if (d.isHashed() || d.isBinary()) {
|
||||
d._data = QByteArray::fromBase64(content.c_str());
|
||||
}
|
||||
else {
|
||||
d._data = content.c_str();
|
||||
}
|
||||
}
|
||||
else {
|
||||
int offset = 0;
|
||||
if (d.isPostfixEncoded()) {
|
||||
offset = 1;
|
||||
if (d._sids.empty()) {
|
||||
FC_THROWM(Base::RuntimeError, "Missing string postfix");
|
||||
}
|
||||
d._postfix = d._sids[0]._sid->_data;
|
||||
}
|
||||
if (d.isIndexed()) {
|
||||
if (d._sids.size() <= offset) {
|
||||
FC_THROWM(Base::RuntimeError, "Missing string prefix");
|
||||
}
|
||||
d._data = d._sids[offset]._sid->_data;
|
||||
}
|
||||
else if (d.isPrefixID() || d.isPrefixIDIndex()) {
|
||||
if (d._sids.size() <= offset) {
|
||||
FC_THROWM(Base::RuntimeError, "Missing string prefix id");
|
||||
}
|
||||
d._data = d._sids[offset]._sid->toString(0).c_str();
|
||||
if (d.isPrefixIDIndex())
|
||||
d._data += ":";
|
||||
}
|
||||
else {
|
||||
stream >> content;
|
||||
d._data = content.c_str();
|
||||
}
|
||||
if (!d.isPostfixEncoded()) {
|
||||
stream >> content;
|
||||
d._postfix = content.c_str();
|
||||
}
|
||||
}
|
||||
|
||||
last = insert(sid);
|
||||
}
|
||||
}
|
||||
|
||||
StringID* StringHasher::insert(const StringIDRef& sid)
|
||||
{
|
||||
assert(sid && sid._sid->_hasher == nullptr);
|
||||
auto& hasher = *sid._sid;
|
||||
hasher._hasher = this;
|
||||
hasher.ref();
|
||||
auto res = _hashes->right.insert(_hashes->right.end(),
|
||||
HashMap::right_map::value_type(sid.value(), &hasher));
|
||||
if (res->second != &hasher) {
|
||||
hasher._hasher = nullptr;
|
||||
hasher.unref();
|
||||
}
|
||||
return res->second;
|
||||
}
|
||||
|
||||
void StringHasher::restoreStream(std::istream& stream, std::size_t count)
|
||||
{
|
||||
_hashes->clear();
|
||||
std::string content;
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t id = 0;
|
||||
uint8_t type = 0;
|
||||
stream >> id >> type >> content;
|
||||
StringIDRef sid = new StringID(id, QByteArray(), static_cast<StringID::Flag>(type));
|
||||
if (sid.isHashed() || sid.isBinary()) {
|
||||
sid._sid->_data = QByteArray::fromBase64(content.c_str());
|
||||
}
|
||||
else {
|
||||
sid._sid->_data = QByteArray(content.c_str());
|
||||
}
|
||||
insert(sid);
|
||||
}
|
||||
}
|
||||
|
||||
void StringHasher::clear()
|
||||
{
|
||||
for (auto& hasher : _hashes->right) {
|
||||
hasher.second->_hasher = nullptr;
|
||||
hasher.second->unref();
|
||||
}
|
||||
_hashes->clear();
|
||||
}
|
||||
|
||||
size_t StringHasher::size() const
|
||||
{
|
||||
return _hashes->size();
|
||||
}
|
||||
|
||||
size_t StringHasher::count() const
|
||||
{
|
||||
size_t count = 0;
|
||||
for (auto& hasher : _hashes->right) {
|
||||
if (hasher.second->getRefCount() > 1) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void StringHasher::Restore(Base::XMLReader& reader)
|
||||
{
|
||||
clear();
|
||||
reader.readElement("StringHasher");
|
||||
_hashes->SaveAll = reader.getAttributeAsInteger("saveall") != 0L;
|
||||
_hashes->Threshold = static_cast<int>(reader.getAttributeAsInteger("threshold"));
|
||||
|
||||
bool newTag = false;
|
||||
if (reader.hasAttribute("new") && reader.getAttributeAsInteger("new") > 0) {
|
||||
reader.readElement("StringHasher2");
|
||||
newTag = true;
|
||||
}
|
||||
|
||||
if (reader.hasAttribute("file")) {
|
||||
const char* file = reader.getAttribute("file");
|
||||
if (*file != '\0') {
|
||||
reader.addFile(file, this);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t count = reader.getAttributeAsUnsigned("count");
|
||||
if (newTag) {
|
||||
restoreStreamNew(reader.beginCharStream(), count);
|
||||
reader.readEndElement("StringHasher2");
|
||||
return;
|
||||
}
|
||||
if ((count != 0U) && reader.FileVersion > 1) {
|
||||
restoreStream(reader.beginCharStream(), count);
|
||||
}
|
||||
else {
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
reader.readElement("Item");
|
||||
StringIDRef sid;
|
||||
long id = reader.getAttributeAsInteger("id");
|
||||
bool hashed = reader.hasAttribute("hash");
|
||||
if (hashed || reader.hasAttribute("data")) {
|
||||
const char* value =
|
||||
hashed ? reader.getAttribute("hash") : reader.getAttribute("data");
|
||||
sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed);
|
||||
}
|
||||
else {
|
||||
sid = new StringID(id, QByteArray(reader.getAttribute("text")));
|
||||
}
|
||||
insert(sid);
|
||||
}
|
||||
}
|
||||
reader.readEndElement("StringHasher");
|
||||
}
|
||||
|
||||
unsigned int StringHasher::getMemSize() const
|
||||
{
|
||||
return (_hashes->SaveAll ? size() : count()) * 10;
|
||||
}
|
||||
|
||||
PyObject* StringHasher::getPyObject()
|
||||
{
|
||||
return new StringHasherPy(this);
|
||||
}
|
||||
|
||||
std::map<long, StringIDRef> StringHasher::getIDMap() const
|
||||
{
|
||||
std::map<long, StringIDRef> ret;
|
||||
for (auto& hasher : _hashes->right) {
|
||||
ret.emplace_hint(ret.end(), hasher.first, StringIDRef(hasher.second));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void StringHasher::clearMarks() const
|
||||
{
|
||||
for (auto& hasher : _hashes->right) {
|
||||
hasher.second->_flags.setFlag(StringID::Flag::Marked, false);
|
||||
}
|
||||
}
|
||||
830
src/App/StringHasher.h
Normal file
830
src/App/StringHasher.h
Normal file
@@ -0,0 +1,830 @@
|
||||
// SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
|
||||
/***************************************************************************************************
|
||||
* *
|
||||
* Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com> *
|
||||
* Copyright (c) 2023 FreeCAD Project Association *
|
||||
* *
|
||||
* This file is part of FreeCAD. *
|
||||
* *
|
||||
* FreeCAD is free software: you can redistribute it and/or modify it under the terms of the *
|
||||
* GNU Lesser General Public License as published by the Free Software Foundation, either *
|
||||
* version 2.1 of the License, or (at your option) any later version. *
|
||||
* *
|
||||
* FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; *
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
|
||||
* See the GNU Lesser General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU Lesser General Public License along with *
|
||||
* FreeCAD. If not, see <https://www.gnu.org/licenses/>. *
|
||||
* *
|
||||
**************************************************************************************************/
|
||||
|
||||
#ifndef APP_STRING_ID_H
|
||||
#define APP_STRING_ID_H
|
||||
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
|
||||
#include <QByteArray>
|
||||
#include <QVector>
|
||||
|
||||
#include <Base/Bitmask.h>
|
||||
#include <Base/Handle.h>
|
||||
#include <Base/Persistence.h>
|
||||
#include <CXX/Objects.hxx>
|
||||
#include <utility>
|
||||
|
||||
#include <Base/PyObjectBase.h>
|
||||
|
||||
|
||||
namespace Data
|
||||
{
|
||||
class MappedName;
|
||||
}
|
||||
|
||||
namespace App
|
||||
{
|
||||
|
||||
class StringHasher;
|
||||
class StringID;
|
||||
class StringIDRef;
|
||||
using StringHasherRef = Base::Reference<StringHasher>;
|
||||
|
||||
/** Class to store a string
|
||||
*
|
||||
* The main purpose of this class is to provide an efficient storage of the
|
||||
* mapped geometry element name (i.e. the new Topological Naming), but it can
|
||||
* also be used as a general purpose string table.
|
||||
*
|
||||
* The StringID is to be stored in a string table (StringHasher), and be
|
||||
* referred to by an integer ID. The stored data can be optionally divided into
|
||||
* two parts, prefix and postfix. This is because a new mapped name is often
|
||||
* created by adding some common postfix to an existing name, so data sharing
|
||||
* can be improved using the following techniques:
|
||||
*
|
||||
* a) reference count (through QByteArray) the main data part,
|
||||
*
|
||||
* b) (recursively) encode prefix and/or postfix as an integer (in the
|
||||
* format of #<hex>, e.g. #1b) that references another StringID,
|
||||
*
|
||||
* c) Check index based name in prefix, e.g. Edge1, Vertex2, and encode
|
||||
* only the text part as StringID. The index is stored separately in
|
||||
* reference class StringIDRef to maximize data sharing.
|
||||
*/
|
||||
class AppExport StringID: public Base::BaseClass, public Base::Handled
|
||||
{
|
||||
TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
|
||||
|
||||
public:
|
||||
/// Flag of the stored string data
|
||||
enum class Flag
|
||||
{
|
||||
/// No flag
|
||||
None = 0,
|
||||
/// The stored data is binary
|
||||
Binary = 1 << 0,
|
||||
/// The stored data is the sha1 hash of the original content
|
||||
Hashed = 1 << 1,
|
||||
/** Postfix is encoded as #<hex>, e.g. #1b, where the hex integer part
|
||||
* refers to another StringID.
|
||||
*/
|
||||
PostfixEncoded = 1 << 2,
|
||||
/// The data is split as prefix and postfix
|
||||
Postfixed = 1 << 3,
|
||||
/// The prefix data is split as text + index
|
||||
Indexed = 1 << 4,
|
||||
/** The prefix data is encoded as #<hex>, e.g. #1b, where the hex
|
||||
* integer part refers to another StringID.
|
||||
*/
|
||||
PrefixID = 1 << 5,
|
||||
/** The prefix split as text + index, where the text is encoded
|
||||
* using another StringID.
|
||||
*/
|
||||
PrefixIDIndex = 1 << 6,
|
||||
/// The string ID is persistent regardless of internal mark
|
||||
Persistent = 1 << 7,
|
||||
/// Internal marked used to check if the string ID is used
|
||||
Marked = 1 << 8,
|
||||
};
|
||||
using Flags = Base::Flags<Flag>;
|
||||
|
||||
/** Constructor
|
||||
* @param id: integer ID of this StringID
|
||||
* @param data: input data
|
||||
* @param flags: flags describes the data
|
||||
*
|
||||
* User code is not supposed to create StringID directly, but through StringHasher::getID()
|
||||
*/
|
||||
StringID(long id, QByteArray data, const Flags& flags = Flag::None)
|
||||
: _id(id),
|
||||
_data(std::move(data)),
|
||||
_flags(flags)
|
||||
{}
|
||||
|
||||
/// Constructs an empty StringID
|
||||
StringID()
|
||||
: _id(0),
|
||||
_flags(Flag::None)
|
||||
{}
|
||||
|
||||
StringID(const StringID& other) = delete;
|
||||
StringID(StringID&& other) noexcept = delete;
|
||||
StringID& operator=(const StringID& rhs) = delete;
|
||||
StringID& operator=(StringID&& rhs) noexcept = delete;
|
||||
|
||||
~StringID() override;
|
||||
|
||||
/// Returns the ID of this StringID
|
||||
long value() const
|
||||
{
|
||||
return _id;
|
||||
}
|
||||
|
||||
/// Returns all related StringIDs that used to encode this StringID
|
||||
const QVector<StringIDRef>& relatedIDs() const
|
||||
{
|
||||
return _sids;
|
||||
}
|
||||
|
||||
/// @name Flag accessors
|
||||
//@{
|
||||
bool isBinary() const;
|
||||
bool isHashed() const;
|
||||
bool isPostfixed() const;
|
||||
bool isPostfixEncoded() const;
|
||||
bool isIndexed() const;
|
||||
bool isPrefixID() const;
|
||||
bool isPrefixIDIndex() const;
|
||||
bool isMarked() const;
|
||||
bool isPersistent() const;
|
||||
//@}
|
||||
|
||||
/// Checks if this StringID is from the input hasher
|
||||
bool isFromSameHasher(const StringHasherRef& hasher) const
|
||||
{
|
||||
return this->_hasher == hasher;
|
||||
}
|
||||
|
||||
/// Returns the owner hasher
|
||||
StringHasherRef getHasher() const
|
||||
{
|
||||
return {_hasher};
|
||||
}
|
||||
|
||||
/// Returns the data (prefix)
|
||||
QByteArray data() const
|
||||
{
|
||||
return _data;
|
||||
}
|
||||
|
||||
/// Returns the postfix
|
||||
QByteArray postfix() const
|
||||
{
|
||||
return _postfix;
|
||||
}
|
||||
|
||||
/// Sets the postfix
|
||||
void setPostfix(QByteArray postfix)
|
||||
{
|
||||
_postfix = std::move(postfix);
|
||||
}
|
||||
|
||||
PyObject* getPyObject() override;
|
||||
/// Returns a Python tuple containing both the text and index
|
||||
PyObject* getPyObjectWithIndex(int index);
|
||||
|
||||
/** Convert to string representation of this StringID
|
||||
* @param index: optional index
|
||||
*
|
||||
* The format is #<id>. And if index is non zero, then #<id>:<index>. Both
|
||||
* <id> and <index> are in hex format.
|
||||
*/
|
||||
std::string toString(int index = 0) const;
|
||||
|
||||
/// Light weight structure of holding a string ID and associated index
|
||||
struct IndexID
|
||||
{
|
||||
long id;
|
||||
int index;
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return id > 0;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& stream, const IndexID& indexID)
|
||||
{
|
||||
stream << indexID.id;
|
||||
if (indexID.index != 0) {
|
||||
stream << ':' << indexID.index;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
};
|
||||
|
||||
/** Parse string to get ID and index
|
||||
* @param name: input string
|
||||
* @param eof: Whether to check the end of string. If true, then the input
|
||||
* string must contain only the string representation of this
|
||||
* StringID
|
||||
* @param size: input string size, or -1 if the input string is zero terminated.
|
||||
* @return Return the integer ID and index.
|
||||
*
|
||||
* The input string is expected to be in the format of #<id> or with index
|
||||
* #<id>:<index>, where both id and index are in hex digits.
|
||||
*/
|
||||
static IndexID fromString(const char* name, bool eof = true, int size = -1);
|
||||
|
||||
/** Parse string to get ID and index
|
||||
* @param bytes: input data
|
||||
* @param eof: Whether to check the end of string. If true, then the input
|
||||
* string must contain only the string representation of this
|
||||
* StringID
|
||||
*
|
||||
* The input string is expected to be in the format of #<id> or with index
|
||||
* #<id>:<index>, where both id and index are in hex digits.
|
||||
*/
|
||||
static IndexID fromString(const QByteArray& bytes, bool eof = true)
|
||||
{
|
||||
return fromString(bytes.constData(), eof, bytes.size());
|
||||
}
|
||||
|
||||
/** Get the text content of this StringID
|
||||
* @param index: optional index
|
||||
* @return Return the text content of this StringID. If the data is binary,
|
||||
* then output in base64 encoded string.
|
||||
*/
|
||||
std::string dataToText(int index = 0) const;
|
||||
|
||||
/** Get the content of this StringID as QByteArray
|
||||
* @param index: optional index.
|
||||
*/
|
||||
QByteArray dataToBytes(int index = 0) const
|
||||
{
|
||||
QByteArray res(_data);
|
||||
if (index != 0) {
|
||||
res += QByteArray::number(index);
|
||||
}
|
||||
if (_postfix.size() != 0) {
|
||||
res += _postfix;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Mark this StringID as used
|
||||
void mark() const;
|
||||
|
||||
/// Mark the StringID as persistent regardless of usage mark
|
||||
void setPersistent(bool enable);
|
||||
|
||||
bool operator<(const StringID& other) const
|
||||
{
|
||||
return compare(other) < 0;
|
||||
}
|
||||
|
||||
/** Compare StringID
|
||||
* @param other: the other StringID for comparison
|
||||
* @return Returns -1 if less than the other StringID, 1 if greater, or 0 if equal
|
||||
*/
|
||||
int compare(const StringID& other) const
|
||||
{
|
||||
if (_hasher < other._hasher) {
|
||||
return -1;
|
||||
}
|
||||
if (_hasher > other._hasher) {
|
||||
return 1;
|
||||
}
|
||||
if (_id < other._id) {
|
||||
return -1;
|
||||
}
|
||||
if (_id > other._id) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
friend class StringHasher;
|
||||
|
||||
private:
|
||||
long _id;
|
||||
QByteArray _data;
|
||||
QByteArray _postfix;
|
||||
StringHasher* _hasher = nullptr;
|
||||
mutable Flags _flags;
|
||||
mutable QVector<StringIDRef> _sids;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/** Counted reference to a StringID instance
|
||||
*/
|
||||
class StringIDRef
|
||||
{
|
||||
public:
|
||||
/// Default construction results in an empty StringIDRef object: it will evaluate to boolean
|
||||
/// "false" if queried.
|
||||
StringIDRef()
|
||||
: _sid(nullptr),
|
||||
_index(0)
|
||||
{}
|
||||
|
||||
/// Standard construction from a heap-allocated StringID. This reference-counting class manages
|
||||
/// the lifetime of the StringID, ensuring it is deallocated when its reference count goes to
|
||||
/// zero.
|
||||
/// \param stringID A pointer to a StringID allocated with "new"
|
||||
/// \param index (optional) An index value to store along with the StringID. Defaults to zero.
|
||||
StringIDRef(StringID* stringID, int index = 0)
|
||||
: _sid(stringID),
|
||||
_index(index)
|
||||
{
|
||||
if (_sid) {
|
||||
_sid->ref();
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy construction results in an incremented reference count for the stored StringID
|
||||
StringIDRef(const StringIDRef& other)
|
||||
: _sid(other._sid),
|
||||
_index(other._index)
|
||||
{
|
||||
if (_sid) {
|
||||
_sid->ref();
|
||||
}
|
||||
}
|
||||
|
||||
/// Move construction does NOT increase the reference count of the StringID (instead, it
|
||||
/// invalidates the pointer in the moved object).
|
||||
StringIDRef(StringIDRef&& other) noexcept
|
||||
: _sid(other._sid),
|
||||
_index(other._index)
|
||||
{
|
||||
other._sid = nullptr;
|
||||
}
|
||||
|
||||
StringIDRef(const StringIDRef& other, int index)
|
||||
: _sid(other._sid),
|
||||
_index(index)
|
||||
{
|
||||
if (_sid) {
|
||||
_sid->ref();
|
||||
}
|
||||
}
|
||||
|
||||
~StringIDRef()
|
||||
{
|
||||
if (_sid) {
|
||||
_sid->unref();
|
||||
}
|
||||
}
|
||||
|
||||
void reset(const StringIDRef& stringID = StringIDRef())
|
||||
{
|
||||
*this = stringID;
|
||||
}
|
||||
|
||||
void reset(const StringIDRef& stringID, int index)
|
||||
{
|
||||
*this = stringID;
|
||||
this->_index = index;
|
||||
}
|
||||
|
||||
void swap(StringIDRef& stringID)
|
||||
{
|
||||
if (*this != stringID) {
|
||||
auto tmp = stringID;
|
||||
stringID = *this;
|
||||
*this = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
StringIDRef& operator=(StringID* stringID)
|
||||
{
|
||||
if (_sid == stringID) {
|
||||
return *this;
|
||||
}
|
||||
if (_sid) {
|
||||
_sid->unref();
|
||||
}
|
||||
_sid = stringID;
|
||||
if (_sid) {
|
||||
_sid->ref();
|
||||
}
|
||||
this->_index = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
StringIDRef& operator=(const StringIDRef& stringID)
|
||||
{
|
||||
if (&stringID == this) {
|
||||
return *this;
|
||||
}
|
||||
if (_sid != stringID._sid) {
|
||||
if (_sid) {
|
||||
_sid->unref();
|
||||
}
|
||||
_sid = stringID._sid;
|
||||
if (_sid) {
|
||||
_sid->ref();
|
||||
}
|
||||
}
|
||||
this->_index = stringID._index;
|
||||
return *this;
|
||||
}
|
||||
|
||||
StringIDRef& operator=(StringIDRef&& stringID) noexcept
|
||||
{
|
||||
if (_sid != stringID._sid) {
|
||||
if (_sid) {
|
||||
_sid->unref();
|
||||
}
|
||||
_sid = stringID._sid;
|
||||
stringID._sid = nullptr;
|
||||
}
|
||||
this->_index = stringID._index;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator<(const StringIDRef& stringID) const
|
||||
{
|
||||
if (!stringID._sid) {
|
||||
return false;
|
||||
}
|
||||
if (!_sid) {
|
||||
return true;
|
||||
}
|
||||
int res = _sid->compare(*stringID._sid);
|
||||
if (res < 0) {
|
||||
return true;
|
||||
}
|
||||
if (res > 0) {
|
||||
return false;
|
||||
}
|
||||
return _index < stringID._index;
|
||||
}
|
||||
|
||||
bool operator==(const StringIDRef& stringID) const
|
||||
{
|
||||
if (_sid && stringID._sid) {
|
||||
return _sid->compare(*stringID._sid) == 0 && _index == stringID._index;
|
||||
}
|
||||
return _sid == stringID._sid;
|
||||
}
|
||||
|
||||
bool operator!=(const StringIDRef& stringID) const
|
||||
{
|
||||
return !(*this == stringID);
|
||||
}
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return _sid != nullptr;
|
||||
}
|
||||
|
||||
int getRefCount() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->getRefCount();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string toString() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->toString(_index);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string dataToText() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->dataToText(_index);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Get a reference to the data: only makes sense if index and postfix are both empty, but
|
||||
/// calling code is responsible for ensuring that.
|
||||
const char* constData() const
|
||||
{
|
||||
if (_sid) {
|
||||
assert(_index == 0);
|
||||
assert(_sid->postfix().isEmpty());
|
||||
return _sid->data().constData();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
const StringID& deref() const
|
||||
{
|
||||
return *_sid;
|
||||
}
|
||||
|
||||
long value() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->value();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
QVector<StringIDRef> relatedIDs() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->relatedIDs();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
bool isBinary() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->isBinary();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isHashed() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->isHashed();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void toBytes(QByteArray& bytes) const
|
||||
{
|
||||
if (_sid) {
|
||||
bytes = _sid->dataToBytes(_index);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject* getPyObject()
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->getPyObjectWithIndex(_index);
|
||||
}
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
void mark() const
|
||||
{
|
||||
if (_sid) {
|
||||
_sid->mark();
|
||||
}
|
||||
}
|
||||
|
||||
bool isMarked() const
|
||||
{
|
||||
return _sid && _sid->isMarked();// NOLINT
|
||||
}
|
||||
|
||||
bool isFromSameHasher(const StringHasherRef& hasher) const
|
||||
{
|
||||
return _sid && _sid->isFromSameHasher(hasher);// NOLINT
|
||||
}
|
||||
|
||||
StringHasherRef getHasher() const
|
||||
{
|
||||
if (_sid) {
|
||||
return _sid->getHasher();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void setPersistent(bool enable)
|
||||
{
|
||||
if (_sid) {
|
||||
_sid->setPersistent(enable);
|
||||
}
|
||||
}
|
||||
|
||||
/// Used predominantly by the unit test code to verify that index is set correctly. In general
|
||||
/// user code should not need to call this function.
|
||||
int getIndex() const
|
||||
{
|
||||
return _index;
|
||||
}
|
||||
|
||||
friend class StringHasher;
|
||||
|
||||
private:
|
||||
StringID* _sid;
|
||||
int _index;
|
||||
};
|
||||
|
||||
|
||||
/// \brief A bidirectional map of strings and their integer identifier.
|
||||
///
|
||||
/// Maps an arbitrary text string to a unique integer ID, maintaining a reference-counted shared
|
||||
/// pointer for each. This permits elimination of unused strings based on their reference
|
||||
/// count. If a duplicate string is added, no additional copy is made, and a new reference to the
|
||||
/// original storage is returned (incrementing the reference counter of the instance).
|
||||
///
|
||||
/// If the string is longer than a given threshold, instead of storing the string, its SHA1 hash is
|
||||
/// stored (and the original string discarded). This allows an upper threshold on the length of a
|
||||
/// stored string, while still effectively guaranteeing uniqueness in the table.
|
||||
class AppExport StringHasher: public Base::Persistence, public Base::Handled
|
||||
{
|
||||
|
||||
TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
|
||||
|
||||
public:
|
||||
StringHasher();
|
||||
~StringHasher() override;
|
||||
|
||||
StringHasher(const StringHasher&) = delete;
|
||||
StringHasher(StringHasher&&) noexcept = delete;
|
||||
StringHasher& operator=(StringHasher& other) = delete;
|
||||
StringHasher& operator=(StringHasher&& other) noexcept = delete;
|
||||
|
||||
unsigned int getMemSize() const override;
|
||||
void Save(Base::Writer& /*writer*/) const override;
|
||||
void Restore(Base::XMLReader& /*reader*/) override;
|
||||
void SaveDocFile(Base::Writer& /*writer*/) const override;
|
||||
void RestoreDocFile(Base::Reader& /*reader*/) override;
|
||||
void setPersistenceFileName(const char* name) const;
|
||||
const std::string& getPersistenceFileName() const;
|
||||
|
||||
/** Maps an arbitrary string to an integer
|
||||
*
|
||||
* @param text: input string.
|
||||
* @param len: length of the string: optional if the string is null-terminated.
|
||||
* @param hashable: whether hashing the string is permitted.
|
||||
* @return A shared pointer to the internally-stored StringID.
|
||||
*
|
||||
* Maps an arbitrary text string to a unique integer ID, returning a reference-counted shared
|
||||
* pointer to the StringID. This permits elimination of unused strings based on their reference
|
||||
* count. If a duplicate string is added, no additional copy is made, and a new reference to the
|
||||
* original storage is returned (incrementing the reference counter of the instance).
|
||||
*
|
||||
* If \c hashable is true and the string is longer than the threshold setting of this
|
||||
* StringHasher, only the SHA1 hash of the string is stored: the original content of the string
|
||||
* is discarded. If \c hashable is false, the string is copied and stored inside a StringID
|
||||
* instance.
|
||||
*
|
||||
* The purpose of this function is to provide a short form of a stable string identification.
|
||||
*/
|
||||
StringIDRef getID(const char* text, int len = -1, bool hashable = false);
|
||||
|
||||
/// Options for string string data
|
||||
enum class Option
|
||||
{
|
||||
/// No option is set
|
||||
None = 0,
|
||||
|
||||
/// The input data is binary
|
||||
Binary = 1 << 0,
|
||||
|
||||
/// Hashing is permitted for this input data. If the data length is longer than the
|
||||
/// threshold setting of the StringHasher, it will be sha1 hashed before storing, and the
|
||||
/// original content of the string is discarded.
|
||||
Hashable = 1 << 1,
|
||||
|
||||
/// Do not copy the data: assume it is constant and exists for the lifetime of this hasher.
|
||||
/// If this option is not set, the data will be copied before storing.
|
||||
NoCopy = 1 << 2,
|
||||
};
|
||||
using Options = Base::Flags<Option>;
|
||||
|
||||
/** Map text or binary data to an integer
|
||||
*
|
||||
* @param data: input data.
|
||||
* @param options: options describing how to store the data.
|
||||
* @return A shared pointer to the internally stored StringID.
|
||||
*
|
||||
* \sa getID (const char*, int, bool);
|
||||
*/
|
||||
StringIDRef getID(const QByteArray& data, Options options = Option::Hashable);
|
||||
|
||||
/** Map geometry element name to an integer */
|
||||
StringIDRef getID(const Data::MappedName& name, const QVector<StringIDRef>& sids);
|
||||
|
||||
/** Obtain the reference counted StringID object from numerical id
|
||||
*
|
||||
* @param id: string ID
|
||||
* @param index: optional index of the string ID
|
||||
* @return Return a shared pointer to the internally stored StringID.
|
||||
*
|
||||
* This function exists because the stored string may be one way hashed,
|
||||
* and the original text is not persistent. The caller use this function to
|
||||
* retrieve the reference count ID object after restore
|
||||
*/
|
||||
StringIDRef getID(long id, int index = 0) const;
|
||||
|
||||
/** Obtain the reference counted StringID object from numerical id and index
|
||||
*
|
||||
* @param id: string ID with index
|
||||
* @return Return a shared pointer to the internally stored StringID.
|
||||
*/
|
||||
StringIDRef getID(const StringID::IndexID& id) const
|
||||
{
|
||||
return getID(id.id, id.index);
|
||||
}
|
||||
|
||||
std::map<long, StringIDRef> getIDMap() const;
|
||||
|
||||
/// Clear all string hashes
|
||||
void clear();
|
||||
|
||||
/// Size of the hash table
|
||||
size_t size() const;
|
||||
|
||||
/// Return the number of hashes that are used by others
|
||||
size_t count() const;
|
||||
|
||||
PyObject* getPyObject() override;
|
||||
|
||||
/** Enable/disable saving all string ID
|
||||
*
|
||||
* If saveAll is true, then compact() does nothing even when called explicitly. Setting
|
||||
* saveAll it to false causes compact() to be run immediately.
|
||||
*/
|
||||
void setSaveAll(bool enable);
|
||||
bool getSaveAll() const;
|
||||
|
||||
/** Set threshold of string hashing
|
||||
*
|
||||
* For hashable strings that are longer than this threshold, the string will
|
||||
* be replaced by its sha1 hash.
|
||||
*/
|
||||
void setThreshold(int threshold);
|
||||
int getThreshold() const;
|
||||
|
||||
/** Clear internal marks
|
||||
*
|
||||
* The internal marks on internally stored StringID instances are used to
|
||||
* check if the StringID is used.
|
||||
*/
|
||||
void clearMarks() const;
|
||||
|
||||
/// Compact string storage by eliminating unused strings from the table.
|
||||
void compact();
|
||||
|
||||
class HashMap;
|
||||
friend class StringID;
|
||||
|
||||
protected:
|
||||
StringID* insert(const StringIDRef& sid);
|
||||
long lastID() const;
|
||||
void saveStream(std::ostream& stream) const;
|
||||
void restoreStream(std::istream& stream, std::size_t count);
|
||||
void restoreStreamNew(std::istream& stream, std::size_t count);
|
||||
|
||||
private:
|
||||
std::unique_ptr<HashMap> _hashes;///< Bidirectional map of StringID and its index (a long int).
|
||||
mutable std::string _filename;
|
||||
};
|
||||
}// namespace App
|
||||
|
||||
ENABLE_BITMASK_OPERATORS(App::StringID::Flag)
|
||||
ENABLE_BITMASK_OPERATORS(App::StringHasher::Option)
|
||||
|
||||
namespace App
|
||||
{
|
||||
inline bool StringID::isBinary() const
|
||||
{
|
||||
return _flags.testFlag(Flag::Binary);
|
||||
}
|
||||
inline bool StringID::isHashed() const
|
||||
{
|
||||
return _flags.testFlag(Flag::Hashed);
|
||||
}
|
||||
inline bool StringID::isPostfixed() const
|
||||
{
|
||||
return _flags.testFlag(Flag::Postfixed);
|
||||
}
|
||||
inline bool StringID::isPostfixEncoded() const
|
||||
{
|
||||
return _flags.testFlag(Flag::PostfixEncoded);
|
||||
}
|
||||
inline bool StringID::isIndexed() const
|
||||
{
|
||||
return _flags.testFlag(Flag::Indexed);
|
||||
}
|
||||
inline bool StringID::isPrefixID() const
|
||||
{
|
||||
return _flags.testFlag(Flag::PrefixID);
|
||||
}
|
||||
inline bool StringID::isPrefixIDIndex() const
|
||||
{
|
||||
return _flags.testFlag(Flag::PrefixIDIndex);
|
||||
}
|
||||
inline bool StringID::isMarked() const
|
||||
{
|
||||
return _flags.testFlag(Flag::Marked);
|
||||
}
|
||||
inline bool StringID::isPersistent() const
|
||||
{
|
||||
return _flags.testFlag(Flag::Persistent);
|
||||
}
|
||||
inline void StringID::setPersistent(bool enable)
|
||||
{
|
||||
_flags.setFlag(Flag::Persistent, enable);
|
||||
}
|
||||
}// namespace App
|
||||
|
||||
#endif// APP_STRING_ID_H
|
||||
71
src/App/StringHasherPy.xml
Normal file
71
src/App/StringHasherPy.xml
Normal file
@@ -0,0 +1,71 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<GenerateModel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="generateMetaModel_Module.xsd">
|
||||
<PythonExport
|
||||
Father="BaseClassPy"
|
||||
Name="StringHasherPy"
|
||||
Twin="StringHasher"
|
||||
TwinPointer="StringHasher"
|
||||
Include="App/StringHasher.h"
|
||||
FatherInclude="Base/BaseClassPy.h"
|
||||
Namespace="App"
|
||||
FatherNamespace="Base"
|
||||
Constructor="true"
|
||||
Reference="true">
|
||||
<Documentation>
|
||||
<Author Licence="LGPL" Name="Zheng, Lei" EMail="realthunder.dev@gmail.com" />
|
||||
<DeveloperDocu>This is the StringHasher class</DeveloperDocu>
|
||||
<UserDocu>This is the StringHasher class</UserDocu>
|
||||
</Documentation>
|
||||
<Methode Name="getID">
|
||||
<Documentation>
|
||||
<UserDocu>
|
||||
getID(txt|id, base64=False) -> StringID
|
||||
|
||||
If the input is text, return a StringID object that is unique within this hasher. This
|
||||
StringID object is reference counted. The hasher may only save hash ID's that are used.
|
||||
|
||||
If the input is an integer, then the hasher will try to find the StringID object stored
|
||||
with the same integer value.
|
||||
|
||||
base64: indicate if the input 'txt' is base64 encoded binary data
|
||||
</UserDocu>
|
||||
</Documentation>
|
||||
</Methode>
|
||||
<Methode Name="isSame" Const="true">
|
||||
<Documentation>
|
||||
<UserDocu>Check if two hasher are the same</UserDocu>
|
||||
</Documentation>
|
||||
</Methode>
|
||||
<Attribute Name="Count" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Return count of used hashes</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Count" Type="Int" />
|
||||
</Attribute>
|
||||
<Attribute Name="Size" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Return the size of the hashes</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Size" Type="Int"/>
|
||||
</Attribute>
|
||||
<Attribute Name="SaveAll">
|
||||
<Documentation>
|
||||
<UserDocu>Whether to save all string hashes regardless of its use count</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="SaveAll" Type="Boolean"/>
|
||||
</Attribute>
|
||||
<Attribute Name="Threshold">
|
||||
<Documentation>
|
||||
<UserDocu>Data length exceed this threshold will be hashed before storing</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Threshold" Type="Int"/>
|
||||
</Attribute>
|
||||
<Attribute Name="Table" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Return the entire string table as Int->String dictionary</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Table" Type="Dict"/>
|
||||
</Attribute>
|
||||
</PythonExport>
|
||||
</GenerateModel>
|
||||
|
||||
148
src/App/StringHasherPyImp.cpp
Normal file
148
src/App/StringHasherPyImp.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
/****************************************************************************
|
||||
* Copyright (c) 2018 Zheng Lei (realthunder) <realthunder.dev@gmail.com> *
|
||||
* *
|
||||
* This file is part of the FreeCAD CAx development system. *
|
||||
* *
|
||||
* This library is free software; you can redistribute it and/or *
|
||||
* modify it under the terms of the GNU Library General Public *
|
||||
* License as published by the Free Software Foundation; either *
|
||||
* version 2 of the License, or (at your option) any later version. *
|
||||
* *
|
||||
* This library is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU Library General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU Library General Public *
|
||||
* License along with this library; see the file COPYING.LIB. If not, *
|
||||
* write to the Free Software Foundation, Inc., 59 Temple Place, *
|
||||
* Suite 330, Boston, MA 02111-1307, USA *
|
||||
* *
|
||||
****************************************************************************/
|
||||
|
||||
#include "PreCompiled.h"
|
||||
|
||||
#include "StringHasher.h"
|
||||
|
||||
#include "StringHasherPy.h"
|
||||
#include "StringHasherPy.cpp"
|
||||
|
||||
using namespace App;
|
||||
|
||||
// returns a string which represent the object e.g. when printed in python
|
||||
std::string StringHasherPy::representation(void) const
|
||||
{
|
||||
std::ostringstream str;
|
||||
str << "<StringHasher at " << getStringHasherPtr() << ">";
|
||||
return str.str();
|
||||
}
|
||||
|
||||
PyObject *StringHasherPy::PyMake(struct _typeobject *, PyObject *, PyObject *) // Python wrapper
|
||||
{
|
||||
return new StringHasherPy(new StringHasher);
|
||||
}
|
||||
|
||||
// constructor method
|
||||
int StringHasherPy::PyInit(PyObject* , PyObject* )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
PyObject* StringHasherPy::isSame(PyObject *args)
|
||||
{
|
||||
PyObject *other;
|
||||
if (!PyArg_ParseTuple(args, "O!", &StringHasherPy::Type, &other)){ // convert args: Python->C
|
||||
return Py::new_reference_to(Py::False());
|
||||
}
|
||||
auto otherHasher = static_cast<StringHasherPy*>(other)->getStringHasherPtr();
|
||||
return Py::new_reference_to(Py::Boolean(getStringHasherPtr() == otherHasher));
|
||||
}
|
||||
|
||||
PyObject* StringHasherPy::getID(PyObject *args)
|
||||
{
|
||||
long id = -1;
|
||||
int index = 0;
|
||||
PyObject *value = 0;
|
||||
PyObject *base64 = Py_False;
|
||||
if (!PyArg_ParseTuple(args, "l|i",&id,&index)) {
|
||||
PyErr_Clear();
|
||||
if (!PyArg_ParseTuple(args, "O|O",&value,&base64))
|
||||
return NULL; // NULL triggers exception
|
||||
}
|
||||
if(id>0) {
|
||||
PY_TRY {
|
||||
auto sid = getStringHasherPtr()->getID(id, index);
|
||||
if(!sid) Py_Return;
|
||||
return sid.getPyObject();
|
||||
}PY_CATCH;
|
||||
}
|
||||
std::string txt;
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
if (PyUnicode_Check(value)) {
|
||||
txt = PyUnicode_AsUTF8(value);
|
||||
}
|
||||
#else
|
||||
if (PyUnicode_Check(value)) {
|
||||
PyObject* unicode = PyUnicode_AsLatin1String(value);
|
||||
txt = PyString_AsString(unicode);
|
||||
Py_DECREF(unicode);
|
||||
}
|
||||
else if (PyString_Check(value)) {
|
||||
txt = PyString_AsString(value);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
throw Py::TypeError("expect argument of type string");
|
||||
PY_TRY {
|
||||
QByteArray data;
|
||||
StringIDRef sid;
|
||||
if(PyObject_IsTrue(base64)) {
|
||||
data = QByteArray::fromBase64(QByteArray::fromRawData(txt.c_str(),txt.size()));
|
||||
sid = getStringHasherPtr()->getID(data,true);
|
||||
}else
|
||||
sid = getStringHasherPtr()->getID(txt.c_str(),txt.size());
|
||||
return sid.getPyObject();
|
||||
}PY_CATCH;
|
||||
}
|
||||
|
||||
Py::Int StringHasherPy::getCount(void) const {
|
||||
return Py::Int((long)getStringHasherPtr()->count());
|
||||
}
|
||||
|
||||
Py::Int StringHasherPy::getSize(void) const {
|
||||
return Py::Int((long)getStringHasherPtr()->size());
|
||||
}
|
||||
|
||||
Py::Boolean StringHasherPy::getSaveAll(void) const {
|
||||
return Py::Boolean(getStringHasherPtr()->getSaveAll());
|
||||
}
|
||||
|
||||
void StringHasherPy::setSaveAll(Py::Boolean value) {
|
||||
getStringHasherPtr()->setSaveAll(value);
|
||||
}
|
||||
|
||||
Py::Int StringHasherPy::getThreshold(void) const {
|
||||
return Py::Int((long)getStringHasherPtr()->getThreshold());
|
||||
}
|
||||
|
||||
void StringHasherPy::setThreshold(Py::Int value) {
|
||||
getStringHasherPtr()->setThreshold(value);
|
||||
}
|
||||
|
||||
Py::Dict StringHasherPy::getTable() const {
|
||||
Py::Dict dict;
|
||||
for(auto &v : getStringHasherPtr()->getIDMap())
|
||||
dict.setItem(Py::Int(v.first),Py::String(v.second.dataToText()));
|
||||
return dict;
|
||||
}
|
||||
|
||||
PyObject *StringHasherPy::getCustomAttributes(const char* /*attr*/) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int StringHasherPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
65
src/App/StringIDPy.xml
Normal file
65
src/App/StringIDPy.xml
Normal file
@@ -0,0 +1,65 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<GenerateModel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="generateMetaModel_Module.xsd">
|
||||
<PythonExport
|
||||
Father="BaseClassPy"
|
||||
Name="StringIDPy"
|
||||
Twin="StringID"
|
||||
TwinPointer="StringID"
|
||||
Include="App/StringHasher.h"
|
||||
FatherInclude="Base/BaseClassPy.h"
|
||||
Namespace="App"
|
||||
FatherNamespace="Base"
|
||||
Reference="true">
|
||||
<Documentation>
|
||||
<Author Licence="LGPL" Name="Zheng, Lei" EMail="realthunder.dev@gmail.com" />
|
||||
<DeveloperDocu>This is the StringID class</DeveloperDocu>
|
||||
<UserDocu>This is the StringID class</UserDocu>
|
||||
</Documentation>
|
||||
<Methode Name="isSame" Const="true">
|
||||
<Documentation>
|
||||
<UserDocu>Check if two StringIDs are the same</UserDocu>
|
||||
</Documentation>
|
||||
</Methode>
|
||||
<Attribute Name="Value" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Return the integer value of this ID</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Value" Type="Int"/>
|
||||
</Attribute>
|
||||
<Attribute Name="Related" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Return the related string IDs</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Related" Type="List"/>
|
||||
</Attribute>
|
||||
<Attribute Name="Data" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Return the data associated with this ID</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Data" Type="String"/>
|
||||
</Attribute>
|
||||
<Attribute Name="IsBinary" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Check if the data is binary, </UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="IsBinary" Type="Boolean"/>
|
||||
</Attribute>
|
||||
<Attribute Name="IsHashed" ReadOnly="true">
|
||||
<Documentation>
|
||||
<UserDocu>Check if the data is hash, if so 'Data' returns a base64 encoded string of the raw hash</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="IsHashed" Type="Boolean"/>
|
||||
</Attribute>
|
||||
<Attribute Name="Index" ReadOnly="false">
|
||||
<Documentation>
|
||||
<UserDocu>Geometry index. Only meaningful for geometry element name</UserDocu>
|
||||
</Documentation>
|
||||
<Parameter Name="Index" Type="Int"/>
|
||||
</Attribute>
|
||||
<ClassDeclarations>private:
|
||||
friend class StringID;
|
||||
int _index = 0;
|
||||
</ClassDeclarations>
|
||||
</PythonExport>
|
||||
</GenerateModel>
|
||||
|
||||
90
src/App/StringIDPyImp.cpp
Normal file
90
src/App/StringIDPyImp.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
/***************************************************************************
|
||||
* Copyright (c) 2018 Zheng Lei (realthunder) <realthunder.dev@gmail.com> *
|
||||
* *
|
||||
* This file is part of the FreeCAD CAx development system. *
|
||||
* *
|
||||
* This library is free software; you can redistribute it and/or *
|
||||
* modify it under the terms of the GNU Library General Public *
|
||||
* License as published by the Free Software Foundation; either *
|
||||
* version 2 of the License, or (at your option) any later version. *
|
||||
* *
|
||||
* This library is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU Library General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU Library General Public *
|
||||
* License along with this library; see the file COPYING.LIB. If not, *
|
||||
* write to the Free Software Foundation, Inc., 59 Temple Place, *
|
||||
* Suite 330, Boston, MA 02111-1307, USA *
|
||||
* *
|
||||
****************************************************************************/
|
||||
|
||||
#include "PreCompiled.h"
|
||||
|
||||
#include "StringHasher.h"
|
||||
|
||||
#include "StringIDPy.h"
|
||||
#include "StringIDPy.cpp"
|
||||
|
||||
using namespace App;
|
||||
|
||||
// returns a string which represent the object e.g. when printed in python
|
||||
std::string StringIDPy::representation() const
|
||||
{
|
||||
return getStringIDPtr()->toString(_index);
|
||||
}
|
||||
|
||||
PyObject* StringIDPy::isSame(PyObject *args)
|
||||
{
|
||||
PyObject *other = nullptr;
|
||||
if (PyArg_ParseTuple(args, "O!", &StringIDPy::Type, &other) == 0) { // convert args: Python->C
|
||||
return Py::new_reference_to(Py::False());
|
||||
}
|
||||
auto *otherPy = static_cast<StringIDPy*>(other);
|
||||
return Py::new_reference_to(Py::Boolean(
|
||||
otherPy->getStringIDPtr() == this->getStringIDPtr()
|
||||
&& otherPy->_index == this->_index));
|
||||
}
|
||||
|
||||
Py::Int StringIDPy::getValue() const {
|
||||
return Py::Int(getStringIDPtr()->value());
|
||||
}
|
||||
|
||||
Py::List StringIDPy::getRelated() const {
|
||||
Py::List list;
|
||||
for (const auto &id : getStringIDPtr()->relatedIDs()) {
|
||||
list.append(Py::Long(id.value()));
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
Py::String StringIDPy::getData() const {
|
||||
return {Py::String(getStringIDPtr()->dataToText(this->_index))};
|
||||
}
|
||||
|
||||
Py::Boolean StringIDPy::getIsBinary() const {
|
||||
return {getStringIDPtr()->isBinary()};
|
||||
}
|
||||
|
||||
Py::Boolean StringIDPy::getIsHashed() const {
|
||||
return {getStringIDPtr()->isHashed()};
|
||||
}
|
||||
|
||||
Py::Int StringIDPy::getIndex() const {
|
||||
return Py::Int(this->_index);
|
||||
}
|
||||
|
||||
void StringIDPy::setIndex(Py::Int index) {
|
||||
this->_index = index;
|
||||
}
|
||||
|
||||
PyObject *StringIDPy::getCustomAttributes(const char* /*attr*/) const
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int StringIDPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -123,6 +123,9 @@ public:
|
||||
using u = typename std::underlying_type<Enum>::type;
|
||||
return static_cast<u>(i) == static_cast<u>(f.i);
|
||||
}
|
||||
typename std::underlying_type<Enum>::type toUnderlyingType() const {
|
||||
return static_cast<typename std::underlying_type<Enum>::type>(i);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "PreCompiled.h"
|
||||
|
||||
#ifndef _PreComp_
|
||||
#include <memory>
|
||||
# include <xercesc/sax2/XMLReaderFactory.hpp>
|
||||
#endif
|
||||
|
||||
@@ -42,6 +43,7 @@
|
||||
#include <zipios++/zipios-config.h>
|
||||
#endif
|
||||
#include <zipios++/zipinputstream.h>
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
|
||||
|
||||
XERCES_CPP_NAMESPACE_USE
|
||||
@@ -283,6 +285,85 @@ void Base::XMLReader::readCharacters()
|
||||
{
|
||||
}
|
||||
|
||||
std::streamsize Base::XMLReader::read(char_type* s, std::streamsize n)
|
||||
{
|
||||
|
||||
char_type* buf = s;
|
||||
if (CharacterOffset < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
std::streamsize copy_size =
|
||||
static_cast<std::streamsize>(Characters.size()) - CharacterOffset;
|
||||
if (n < copy_size) {
|
||||
copy_size = n;
|
||||
}
|
||||
std::memcpy(s, Characters.c_str() + CharacterOffset, copy_size);
|
||||
n -= copy_size;
|
||||
s += copy_size;
|
||||
CharacterOffset += copy_size;
|
||||
|
||||
if (!n) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (ReadType == Chars) {
|
||||
read();
|
||||
}
|
||||
else {
|
||||
CharacterOffset = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return s - buf;
|
||||
}
|
||||
|
||||
void Base::XMLReader::endCharStream()
|
||||
{
|
||||
CharacterOffset = -1;
|
||||
CharStream.reset();
|
||||
}
|
||||
|
||||
std::istream& Base::XMLReader::charStream()
|
||||
{
|
||||
if (!CharStream) {
|
||||
throw Base::XMLParseException("no current character stream");
|
||||
}
|
||||
return *CharStream;
|
||||
}
|
||||
|
||||
std::istream& Base::XMLReader::beginCharStream()
|
||||
{
|
||||
if (CharStream) {
|
||||
throw Base::XMLParseException("recursive character stream");
|
||||
}
|
||||
|
||||
// TODO: An XML element can actually contain a mix of child elements and
|
||||
// characters. So we should not actually demand 'StartElement' here. But
|
||||
// with the current implementation of character stream, we cannot track
|
||||
// child elements and character content at the same time.
|
||||
if (ReadType == StartElement) {
|
||||
CharacterOffset = 0;
|
||||
read();
|
||||
}
|
||||
else if (ReadType == StartEndElement) {
|
||||
// If we are currently at a self-closing element, just leave the offset
|
||||
// as negative and do not read any characters. This will result in an
|
||||
// empty input stream for the caller.
|
||||
CharacterOffset = -1;
|
||||
}
|
||||
else {
|
||||
throw Base::XMLParseException("invalid state while reading character stream");
|
||||
}
|
||||
|
||||
CharStream = std::make_unique<boost::iostreams::filtering_istream>();
|
||||
auto* filteringStream = dynamic_cast<boost::iostreams::filtering_istream*>(CharStream.get());
|
||||
filteringStream->push(boost::ref(*this));
|
||||
return *CharStream;
|
||||
}
|
||||
|
||||
void Base::XMLReader::readBinFile(const char* filename)
|
||||
{
|
||||
Base::FileInfo fi(filename);
|
||||
|
||||
@@ -33,6 +33,8 @@
|
||||
#include <xercesc/sax2/Attributes.hpp>
|
||||
#include <xercesc/sax2/DefaultHandler.hpp>
|
||||
|
||||
#include <boost/iostreams/concepts.hpp>
|
||||
|
||||
#include "FileInfo.h"
|
||||
|
||||
|
||||
@@ -127,6 +129,13 @@ public:
|
||||
XMLReader(const char* FileName, std::istream&);
|
||||
~XMLReader() override;
|
||||
|
||||
/** @name boost iostream device interface */
|
||||
//@{
|
||||
using category = boost::iostreams::source_tag;
|
||||
using char_type = char;
|
||||
std::streamsize read(char_type* s, std::streamsize n);
|
||||
//@}
|
||||
|
||||
bool isValid() const { return _valid; }
|
||||
bool isVerbose() const { return _verbose; }
|
||||
void setVerbose(bool on) { _verbose = on; }
|
||||
@@ -157,6 +166,20 @@ public:
|
||||
void readEndElement(const char* ElementName=nullptr, int level=-1);
|
||||
/// read until characters are found
|
||||
void readCharacters();
|
||||
|
||||
/** Obtain an input stream for reading characters
|
||||
*
|
||||
* @return Return a input stream for reading characters. The stream will be
|
||||
* auto destroyed when you call with readElement() or readEndElement(), or
|
||||
* you can end it explicitly with endCharStream().
|
||||
*/
|
||||
std::istream &beginCharStream();
|
||||
/// Manually end the current character stream
|
||||
void endCharStream();
|
||||
/// Obtain the current character stream
|
||||
std::istream &charStream();
|
||||
//@}
|
||||
|
||||
/// read binary file
|
||||
void readBinFile(const char*);
|
||||
//@}
|
||||
@@ -259,6 +282,7 @@ protected:
|
||||
std::string LocalName;
|
||||
std::string Characters;
|
||||
unsigned int CharacterCount;
|
||||
std::streamsize CharacterOffset{-1};
|
||||
|
||||
std::map<std::string,std::string> AttrMap;
|
||||
using AttrMapType = std::map<std::string,std::string>;
|
||||
@@ -285,6 +309,8 @@ protected:
|
||||
std::vector<std::string> FileNames;
|
||||
|
||||
std::bitset<32> StatusBits;
|
||||
|
||||
std::unique_ptr<std::istream> CharStream;
|
||||
};
|
||||
|
||||
class BaseExport Reader : public std::istream
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#include <limits>
|
||||
#include <locale>
|
||||
#include <iomanip>
|
||||
|
||||
#include "Writer.h"
|
||||
#include "Base64.h"
|
||||
@@ -34,11 +35,43 @@
|
||||
#include "Stream.h"
|
||||
#include "Tools.h"
|
||||
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
#include <memory>
|
||||
|
||||
using namespace Base;
|
||||
using namespace std;
|
||||
using namespace zipios;
|
||||
|
||||
// boost iostream filter to escape ']]>' in text file saved into CDATA section.
|
||||
// It does not check if the character is valid utf8 or not.
|
||||
struct cdata_filter {
|
||||
|
||||
typedef char char_type;
|
||||
typedef boost::iostreams::output_filter_tag category;
|
||||
|
||||
template<typename Device>
|
||||
inline bool put(Device& dev, char c) {
|
||||
switch(state) {
|
||||
case 0:
|
||||
case 1:
|
||||
if(c == ']')
|
||||
++state;
|
||||
else
|
||||
state = 0;
|
||||
break;
|
||||
case 2:
|
||||
if(c == '>') {
|
||||
static const char escape[] = "]]><![CDATA[";
|
||||
boost::iostreams::write(dev,escape,sizeof(escape)-1);
|
||||
}
|
||||
state = 0;
|
||||
break;
|
||||
}
|
||||
return boost::iostreams::put(dev,c);
|
||||
}
|
||||
|
||||
int state = 0;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Writer: Constructors and Destructor
|
||||
@@ -55,6 +88,44 @@ Writer::Writer()
|
||||
|
||||
Writer::~Writer() = default;
|
||||
|
||||
std::ostream& Writer::beginCharStream()
|
||||
{
|
||||
if (CharStream) {
|
||||
throw Base::RuntimeError("Writer::beginCharStream(): invalid state");
|
||||
}
|
||||
|
||||
Stream() << "<![CDATA[";
|
||||
CharStream = std::make_unique<boost::iostreams::filtering_ostream>();
|
||||
auto* filteredStream = dynamic_cast<boost::iostreams::filtering_ostream*>(CharStream.get());
|
||||
filteredStream->push(cdata_filter());
|
||||
filteredStream->push(Stream());
|
||||
*filteredStream << std::setprecision(std::numeric_limits<double>::digits10 + 1);
|
||||
return *CharStream;
|
||||
}
|
||||
|
||||
std::ostream& Writer::endCharStream()
|
||||
{
|
||||
if (CharStream) {
|
||||
CharStream.reset();
|
||||
Stream() << "]]>";
|
||||
}
|
||||
return Stream();
|
||||
}
|
||||
|
||||
std::ostream& Writer::charStream()
|
||||
{
|
||||
if (!CharStream) {
|
||||
throw Base::RuntimeError("Writer::endCharStream(): no current character stream");
|
||||
}
|
||||
return *CharStream;
|
||||
}
|
||||
|
||||
void Writer::insertText(const std::string& s)
|
||||
{
|
||||
beginCharStream() << s;
|
||||
endCharStream();
|
||||
}
|
||||
|
||||
void Writer::insertAsciiFile(const char* FileName)
|
||||
{
|
||||
Base::FileInfo fi(FileName);
|
||||
|
||||
@@ -72,6 +72,8 @@ public:
|
||||
void insertAsciiFile(const char* FileName);
|
||||
/// insert a binary file BASE64 coded as CDATA section in the XML file
|
||||
void insertBinFile(const char* FileName);
|
||||
/// insert text string as CDATA
|
||||
void insertText(const std::string &s);
|
||||
|
||||
/** @name additional file writing */
|
||||
//@{
|
||||
@@ -115,6 +117,23 @@ public:
|
||||
|
||||
virtual std::ostream &Stream()=0;
|
||||
|
||||
/** Create an output stream for storing character content
|
||||
* The input is assumed to be valid character with
|
||||
* the current XML encoding, and will be enclosed inside
|
||||
* CDATA section. The stream will scan the input and
|
||||
* properly escape any CDATA ending inside.
|
||||
* @return Returns an output stream.
|
||||
*
|
||||
* You must call endCharStream() to end the current character stream.
|
||||
*/
|
||||
std::ostream &beginCharStream();
|
||||
/** End the current character output stream
|
||||
* @return Returns the normal writer stream for convenience
|
||||
*/
|
||||
std::ostream &endCharStream();
|
||||
/// Return the current character output stream
|
||||
std::ostream &charStream();
|
||||
|
||||
/// name for underlying file saves
|
||||
std::string ObjectName;
|
||||
|
||||
@@ -138,6 +157,8 @@ protected:
|
||||
private:
|
||||
Writer(const Writer&);
|
||||
Writer& operator=(const Writer&);
|
||||
|
||||
std::unique_ptr<std::ostream> CharStream;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ endif()
|
||||
add_executable(Tests_run)
|
||||
add_subdirectory(lib)
|
||||
add_subdirectory(src)
|
||||
target_include_directories(Tests_run PUBLIC ${Python3_INCLUDE_DIRS})
|
||||
target_link_libraries(Tests_run gtest_main ${Google_Tests_LIBS} FreeCADApp)
|
||||
|
||||
add_executable(Sketcher_tests_run)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
target_sources(
|
||||
Tests_run
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Application.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Branding.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Expression.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ElementMap.cpp
|
||||
@@ -9,5 +10,5 @@ target_sources(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/MappedElement.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/MappedName.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Metadata.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Application.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/StringHasher.cpp
|
||||
)
|
||||
|
||||
1575
tests/src/App/StringHasher.cpp
Normal file
1575
tests/src/App/StringHasher.cpp
Normal file
File diff suppressed because it is too large
Load Diff
30
tests/src/Base/Bitmask.cpp
Normal file
30
tests/src/Base/Bitmask.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
// SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <Base/Bitmask.h>
|
||||
|
||||
enum class TestFlagEnum {
|
||||
Flag1,
|
||||
Flag2,
|
||||
Flag3
|
||||
};
|
||||
|
||||
class BitmaskTest: public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
// void SetUp() override {};
|
||||
// void TearDown() override {};
|
||||
};
|
||||
|
||||
TEST_F(BitmaskTest, toUnderlyingType)
|
||||
{
|
||||
// Arrange
|
||||
Base::Flags<TestFlagEnum> flag1 {TestFlagEnum::Flag1};
|
||||
|
||||
// Act
|
||||
auto result = flag1.toUnderlyingType();
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ(typeid(result), typeid(int));
|
||||
}
|
||||
@@ -1,9 +1,12 @@
|
||||
target_sources(
|
||||
Tests_run
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Bitmask.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Matrix.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Rotation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tst_Tools.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Unit.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Quantity.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Reader.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Rotation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Unit.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Writer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tst_Tools.cpp
|
||||
)
|
||||
|
||||
212
tests/src/Base/Reader.cpp
Normal file
212
tests/src/Base/Reader.cpp
Normal file
@@ -0,0 +1,212 @@
|
||||
// SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "Base/Exception.h"
|
||||
#include "Base/Reader.h"
|
||||
#include <array>
|
||||
#include <filesystem>
|
||||
#include <fmt/format.h>
|
||||
#include <fstream>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
class ReaderTest: public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
void SetUp() override
|
||||
{
|
||||
xercesc_3_2::XMLPlatformUtils::Initialize();
|
||||
_tempDir = fs::temp_directory_path();
|
||||
std::string filename = "unit_test_Reader.xml";
|
||||
_tempFile = _tempDir / filename;
|
||||
}
|
||||
|
||||
void TearDown() override
|
||||
{
|
||||
if (std::filesystem::exists(_tempFile)) {
|
||||
std::filesystem::remove(_tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
void givenDataAsXMLStream(const std::string& data)
|
||||
{
|
||||
auto stringData =
|
||||
R"(<?xml version="1.0" encoding="UTF-8"?><document>)" + data + "</document>";
|
||||
std::istringstream stream(stringData);
|
||||
std::ofstream fileStream(_tempFile);
|
||||
fileStream.write(stringData.data(), static_cast<std::streamsize>(stringData.length()));
|
||||
fileStream.close();
|
||||
std::ifstream inputStream(_tempFile);
|
||||
_reader = std::make_unique<Base::XMLReader>(_tempFile.string().c_str(), inputStream);
|
||||
}
|
||||
|
||||
Base::XMLReader* Reader()
|
||||
{
|
||||
return _reader.get();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<Base::XMLReader> _reader;
|
||||
fs::path _tempDir;
|
||||
fs::path _tempFile;
|
||||
};
|
||||
|
||||
TEST_F(ReaderTest, beginCharStreamNormal)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data>Test ASCII data</data>");
|
||||
Reader()->readElement("data");
|
||||
|
||||
// Act
|
||||
auto& result = Reader()->beginCharStream();
|
||||
|
||||
// Assert
|
||||
EXPECT_TRUE(result.good());
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, beginCharStreamOpenClose)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data id='12345' />");
|
||||
Reader()->readElement("data");
|
||||
|
||||
// Act
|
||||
auto& result = Reader()->beginCharStream();// Not an error, even though there is no data
|
||||
|
||||
// Assert
|
||||
EXPECT_TRUE(result.good());
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, beginCharStreamAlreadyBegun)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data>Test ASCII data</data>");
|
||||
Reader()->readElement("data");
|
||||
Reader()->beginCharStream();
|
||||
|
||||
// Act & Assert
|
||||
EXPECT_THROW(Reader()->beginCharStream(), Base::XMLParseException);
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, charStreamGood)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data>Test ASCII data</data>");
|
||||
Reader()->readElement("data");
|
||||
Reader()->beginCharStream();
|
||||
|
||||
// Act
|
||||
auto& result = Reader()->charStream();
|
||||
|
||||
// Assert
|
||||
EXPECT_TRUE(result.good());
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, charStreamBad)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data>Test ASCII data</data>");
|
||||
Reader()->readElement("data");
|
||||
|
||||
// Act & Assert
|
||||
EXPECT_THROW(Reader()->charStream(), Base::XMLParseException);
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, endCharStreamGood)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data>Test ASCII data</data>");
|
||||
Reader()->readElement("data");
|
||||
Reader()->beginCharStream();
|
||||
|
||||
// Act & Assert
|
||||
Reader()->endCharStream();// Does not throw
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, endCharStreamBad)
|
||||
{
|
||||
// Arrange
|
||||
givenDataAsXMLStream("<data>Test ASCII data</data>");
|
||||
Reader()->readElement("data");
|
||||
// Do not open the stream...
|
||||
|
||||
// Act & Assert
|
||||
Reader()->endCharStream();// Does not throw, even with no open stream
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, readDataSmallerThanBuffer)
|
||||
{
|
||||
// Arrange
|
||||
constexpr size_t bufferSize {20};
|
||||
std::string expectedData {"Test ASCII data"};
|
||||
givenDataAsXMLStream("<data>" + expectedData + "</data>");
|
||||
Reader()->readElement("data");
|
||||
Reader()->beginCharStream();
|
||||
std::array<char, bufferSize> buffer {};
|
||||
|
||||
// Act
|
||||
auto bytesRead = Reader()->read(buffer.data(), bufferSize);
|
||||
|
||||
// Assert
|
||||
EXPECT_STREQ(expectedData.c_str(), buffer.data());
|
||||
EXPECT_EQ(expectedData.length(), bytesRead);
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, readDataLargerThanBuffer)
|
||||
{
|
||||
// Arrange
|
||||
constexpr size_t bufferSize {5};
|
||||
std::string expectedData {"Test ASCII data"};
|
||||
givenDataAsXMLStream("<data>" + expectedData + "</data>");
|
||||
Reader()->readElement("data");
|
||||
Reader()->beginCharStream();
|
||||
std::array<char, bufferSize> buffer {};
|
||||
|
||||
// Act
|
||||
auto bytesRead = Reader()->read(buffer.data(), bufferSize);
|
||||
|
||||
// Assert
|
||||
for (size_t i = 0; i < bufferSize; ++i) {
|
||||
EXPECT_EQ(expectedData[i], buffer.at(i));
|
||||
}
|
||||
EXPECT_EQ(bufferSize, bytesRead);
|
||||
}
|
||||
|
||||
TEST_F(ReaderTest, readDataLargerThanBufferSecondRead)
|
||||
{
|
||||
// Arrange
|
||||
constexpr size_t bufferSize {5};
|
||||
std::string expectedData {"Test ASCII data"};
|
||||
givenDataAsXMLStream("<data>" + expectedData + "</data>");
|
||||
Reader()->readElement("data");
|
||||
Reader()->beginCharStream();
|
||||
std::array<char, bufferSize> buffer {};
|
||||
Reader()->read(buffer.data(), bufferSize);// Read the first five bytes
|
||||
|
||||
// Act
|
||||
auto bytesRead = Reader()->read(buffer.data(), bufferSize);// Second five bytes
|
||||
|
||||
// Assert
|
||||
for (size_t i = 0; i < bufferSize; ++i) {
|
||||
EXPECT_EQ(expectedData[i + bufferSize], buffer.at(i));
|
||||
}
|
||||
EXPECT_EQ(bufferSize, bytesRead);
|
||||
}
|
||||
|
||||
|
||||
TEST_F(ReaderTest, readDataNotStarted)
|
||||
{
|
||||
// Arrange
|
||||
constexpr size_t bufferSize {20};
|
||||
std::string expectedData {"Test ASCII data"};
|
||||
givenDataAsXMLStream("<data>" + expectedData + "</data>");
|
||||
Reader()->readElement("data");
|
||||
std::array<char, bufferSize> buffer {};
|
||||
|
||||
// Act
|
||||
auto bytesRead = Reader()->read(buffer.data(), bufferSize);
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ(-1, bytesRead);// Because we didn't call beginCharStream
|
||||
}
|
||||
118
tests/src/Base/Writer.cpp
Normal file
118
tests/src/Base/Writer.cpp
Normal file
@@ -0,0 +1,118 @@
|
||||
// SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "Base/Exception.h"
|
||||
#include "Base/Writer.h"
|
||||
|
||||
// Writer is designed to be a base class, so for testing we actually instantiate a StringWriter,
|
||||
// which is derived from it
|
||||
|
||||
class WriterTest : public ::testing::Test {
|
||||
protected:
|
||||
//void SetUp() override {}
|
||||
|
||||
// void TearDown() override {}
|
||||
protected:
|
||||
Base::StringWriter _writer;
|
||||
};
|
||||
|
||||
TEST_F(WriterTest, insertTextSimple)
|
||||
{
|
||||
// Arrange
|
||||
std::string testTextData {"Simple ASCII data"};
|
||||
std::string expectedResult {"<![CDATA[" + testTextData + "]]>"};
|
||||
|
||||
// Act
|
||||
_writer.insertText(testTextData);
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ(expectedResult, _writer.getString());
|
||||
}
|
||||
|
||||
/// If the data happens to actually include an XML CDATA close marker, that needs to be "escaped" --
|
||||
/// this is done by breaking it up into two separate CDATA sections, splitting apart the marker.
|
||||
TEST_F(WriterTest, insertTextNeedsEscape)
|
||||
{
|
||||
// Arrange
|
||||
std::string testDataA {"ASCII data with a close marker in it, like so: ]]"};
|
||||
std::string testDataB {"> "};
|
||||
std::string expectedResult {"<![CDATA[" + testDataA + "]]><![CDATA[" + testDataB + "]]>"};
|
||||
|
||||
// Act
|
||||
_writer.insertText(testDataA + testDataB);
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ(expectedResult, _writer.getString());
|
||||
}
|
||||
|
||||
TEST_F(WriterTest, insertNonAsciiData)
|
||||
{
|
||||
// Arrange
|
||||
std::string testData {"\x01\x02\x03\x04\u0001F450😀"};
|
||||
std::string expectedResult {"<![CDATA[" + testData + "]]>"};
|
||||
|
||||
// Act
|
||||
_writer.insertText(testData);
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ(expectedResult, _writer.getString());
|
||||
}
|
||||
|
||||
TEST_F(WriterTest, beginCharStream)
|
||||
{
|
||||
// Arrange & Act
|
||||
auto & checkStream {_writer.beginCharStream()};
|
||||
|
||||
// Assert
|
||||
EXPECT_TRUE(checkStream.good());
|
||||
}
|
||||
|
||||
TEST_F(WriterTest, beginCharStreamTwice)
|
||||
{
|
||||
// Arrange
|
||||
_writer.beginCharStream();
|
||||
|
||||
// Act & Assert
|
||||
EXPECT_THROW(
|
||||
_writer.beginCharStream(),
|
||||
Base::RuntimeError
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(WriterTest, endCharStream)
|
||||
{
|
||||
// Arrange
|
||||
_writer.beginCharStream();
|
||||
|
||||
// Act
|
||||
_writer.endCharStream();
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ("<![CDATA[]]>", _writer.getString());
|
||||
}
|
||||
|
||||
TEST_F(WriterTest, endCharStreamTwice)
|
||||
{
|
||||
// Arrange
|
||||
_writer.beginCharStream();
|
||||
_writer.endCharStream();
|
||||
|
||||
// Act
|
||||
_writer.endCharStream(); // Doesn't throw, or do anything at all
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ("<![CDATA[]]>", _writer.getString());
|
||||
}
|
||||
|
||||
TEST_F(WriterTest, charStream)
|
||||
{
|
||||
// Arrange
|
||||
auto& streamA {_writer.beginCharStream()};
|
||||
|
||||
// Act
|
||||
auto& streamB {_writer.charStream()};
|
||||
|
||||
// Assert
|
||||
EXPECT_EQ(&streamA, &streamB);
|
||||
}
|
||||
Reference in New Issue
Block a user