App/Toponaming: StringHasher tests, commenting, and cleanup
This commit is contained in:
committed by
Chris Hennes
parent
2ef98bfdc0
commit
ec98b5e498
@@ -180,18 +180,18 @@ StringHasher::~StringHasher()
|
||||
clear();
|
||||
}
|
||||
|
||||
StringHasher::StringHasher([[maybe_unused]] StringHasher &&other) noexcept
|
||||
StringHasher::StringHasher([[maybe_unused]] StringHasher&& other) noexcept
|
||||
{
|
||||
// Private: unimplemented
|
||||
}
|
||||
|
||||
StringHasher& StringHasher::operator=([[maybe_unused]] StringHasher &other)
|
||||
StringHasher& StringHasher::operator=([[maybe_unused]] StringHasher& other)
|
||||
{
|
||||
// Private: unimplemented
|
||||
return *this;
|
||||
}
|
||||
|
||||
StringHasher& StringHasher::operator=([[maybe_unused]] StringHasher &&other) noexcept
|
||||
StringHasher& StringHasher::operator=([[maybe_unused]] StringHasher&& other) noexcept
|
||||
{
|
||||
// Private: unimplemented
|
||||
return *this;
|
||||
@@ -317,22 +317,28 @@ StringIDRef StringHasher::getID(const QByteArray& data, Options options)
|
||||
|
||||
StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<StringIDRef>& sids)
|
||||
{
|
||||
StringID anID;
|
||||
anID._postfix = name.postfixBytes();
|
||||
StringID tempID;
|
||||
tempID._postfix = name.postfixBytes();
|
||||
|
||||
Data::IndexedName indexed;
|
||||
if (anID._postfix.size() == 0) {
|
||||
if (tempID._postfix.size() == 0) {
|
||||
// Restrict this optimization to only cases with no postfix because it is causing some
|
||||
// problems during recomputes. TODO: This needs to be investigated further
|
||||
indexed = Data::IndexedName(name.dataBytes());
|
||||
}
|
||||
if (indexed) {
|
||||
anID._data =
|
||||
// If this is an IndexedName, then _data only stores the base part of the name, without the
|
||||
// integer index
|
||||
tempID._data =
|
||||
QByteArray::fromRawData(indexed.getType(), static_cast<int>(strlen(indexed.getType())));
|
||||
}
|
||||
else {
|
||||
anID._data = name.dataBytes();
|
||||
// Store the entire name in _data, but temporarily re-use the existing memory
|
||||
tempID._data = name.dataBytes();
|
||||
}
|
||||
|
||||
auto it = _hashes->left.find(&anID);
|
||||
// Check to see if there is already an entry in the hash table for this StringID
|
||||
auto it = _hashes->left.find(&tempID);
|
||||
if (it != _hashes->left.end()) {
|
||||
auto res = StringIDRef(it->first);
|
||||
if (indexed) {
|
||||
@@ -342,40 +348,47 @@ StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<Stri
|
||||
}
|
||||
|
||||
if (!indexed && name.isRaw()) {
|
||||
anID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
|
||||
// Make a copy of the memory if we didn't do so earlier
|
||||
tempID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
|
||||
}
|
||||
|
||||
// If the postfix is not already encoded, use getID to encode it:
|
||||
StringIDRef postfixRef;
|
||||
if ((anID._postfix.size() != 0) && anID._postfix.indexOf("#") < 0) {
|
||||
postfixRef = getID(anID._postfix);
|
||||
postfixRef.toBytes(anID._postfix);
|
||||
if ((tempID._postfix.size() != 0) && tempID._postfix.indexOf("#") < 0) {
|
||||
postfixRef = getID(tempID._postfix);
|
||||
postfixRef.toBytes(tempID._postfix);
|
||||
}
|
||||
|
||||
// If _data is an IndexedName, use getID to encode it:
|
||||
StringIDRef indexRef;
|
||||
if (indexed) {
|
||||
indexRef = getID(anID._data);
|
||||
indexRef = getID(tempID._data);
|
||||
}
|
||||
|
||||
StringIDRef sid(new StringID(lastID() + 1, anID._data));
|
||||
StringID& newStringID = *sid._sid;
|
||||
if (anID._postfix.size() != 0) {
|
||||
// The real StringID object that we are going to insert
|
||||
StringIDRef newStringIDRef(new StringID(lastID() + 1, tempID._data));
|
||||
StringID& newStringID = *newStringIDRef._sid;
|
||||
if (tempID._postfix.size() != 0) {
|
||||
newStringID._flags.setFlag(StringID::Flag::Postfixed);
|
||||
newStringID._postfix = anID._postfix;
|
||||
newStringID._postfix = tempID._postfix;
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
for (const auto& hasher : sids) {
|
||||
if (hasher && hasher._sid->_hasher == this) {
|
||||
++count;
|
||||
// Count the related SIDs that use this hasher
|
||||
int numSIDs = 0;
|
||||
for (const auto& relatedID : sids) {
|
||||
if (relatedID && relatedID._sid->_hasher == this) {
|
||||
++numSIDs;
|
||||
}
|
||||
}
|
||||
|
||||
int extra = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
|
||||
if (count == sids.size() && !postfixRef && !indexRef) {
|
||||
int numAddedSIDs = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
|
||||
if (numSIDs == sids.size() && !postfixRef && !indexRef) {
|
||||
// The simplest case: just copy the whole list
|
||||
newStringID._sids = sids;
|
||||
}
|
||||
else {
|
||||
newStringID._sids.reserve(count + extra);
|
||||
// Put the added SIDs at the front of the SID list
|
||||
newStringID._sids.reserve(numSIDs + numAddedSIDs);
|
||||
if (postfixRef) {
|
||||
newStringID._flags.setFlag(StringID::Flag::PostfixEncoded);
|
||||
newStringID._sids.push_back(postfixRef);
|
||||
@@ -384,15 +397,21 @@ StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<Stri
|
||||
newStringID._flags.setFlag(StringID::Flag::Indexed);
|
||||
newStringID._sids.push_back(indexRef);
|
||||
}
|
||||
for (const auto& hasher : sids) {
|
||||
if (hasher && hasher._sid->_hasher == this) {
|
||||
newStringID._sids.push_back(hasher);
|
||||
// Append the sids from the input list whose hasher is this one
|
||||
for (const auto& relatedID : sids) {
|
||||
if (relatedID && relatedID._sid->_hasher == this) {
|
||||
newStringID._sids.push_back(relatedID);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newStringID._sids.size() > 10) {
|
||||
std::sort(newStringID._sids.begin() + extra, newStringID._sids.end());
|
||||
newStringID._sids.erase(std::unique(newStringID._sids.begin() + extra, newStringID._sids.end()),
|
||||
|
||||
// If the number of related IDs is larger than some threshold (hardcoded to 10 right now), then
|
||||
// remove any duplicates (ignoring the new SIDs we may have just added)
|
||||
const int relatedIDSizeThreshold {10};
|
||||
if (newStringID._sids.size() > relatedIDSizeThreshold) {
|
||||
std::sort(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end());
|
||||
newStringID._sids.erase(
|
||||
std::unique(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end()),
|
||||
newStringID._sids.end());
|
||||
}
|
||||
|
||||
@@ -401,14 +420,14 @@ StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<Stri
|
||||
if ((newStringID._postfix.size() != 0) && !indexed) {
|
||||
// Use the fromString function to parse the new StringID's data field for a possible index
|
||||
StringID::IndexID res = StringID::fromString(newStringID._data);
|
||||
if (res.id > 0) { // If the data had an index
|
||||
if (res.id > 0) {// If the data had an index
|
||||
int offset = newStringID.isPostfixEncoded() ? 1 : 0;
|
||||
// Search for the SID with that index
|
||||
for (int i = offset; i < newStringID._sids.size(); ++i) {
|
||||
if (newStringID._sids[i].value() == res.id) {
|
||||
if (i != offset) {
|
||||
// If this SID is not already the first element in sids, move it there by
|
||||
// swapping it with
|
||||
// swapping it with whatever WAS there
|
||||
std::swap(newStringID._sids[offset], newStringID._sids[i]);
|
||||
}
|
||||
if (res.index != 0) {
|
||||
@@ -423,7 +442,7 @@ StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<Stri
|
||||
}
|
||||
}
|
||||
|
||||
return {insert(sid), indexed.getIndex()};
|
||||
return {insert(newStringIDRef), indexed.getIndex()};
|
||||
}
|
||||
|
||||
StringIDRef StringHasher::getID(long id, int index) const
|
||||
|
||||
@@ -74,7 +74,7 @@ using StringHasherRef = Base::Reference<StringHasher>;
|
||||
*/
|
||||
class AppExport StringID: public Base::BaseClass, public Base::Handled
|
||||
{
|
||||
TYPESYSTEM_HEADER_WITH_OVERRIDE();
|
||||
TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
|
||||
|
||||
public:
|
||||
/// Flag of the stored string data
|
||||
@@ -316,10 +316,10 @@ private:
|
||||
StringID([[maybe_unused]] StringID&& other) noexcept
|
||||
: _id(0),
|
||||
_flags(StringID::Flag::None) {};
|
||||
StringID& operator=([[maybe_unused]] const StringID& rhs)
|
||||
StringID& operator=([[maybe_unused]] const StringID& rhs)// NOLINT
|
||||
{
|
||||
return *this;
|
||||
};// NOLINT
|
||||
};
|
||||
StringID& operator=([[maybe_unused]] StringID&& rhs) noexcept
|
||||
{
|
||||
return *this;
|
||||
@@ -590,12 +590,12 @@ public:
|
||||
|
||||
bool isMarked() const
|
||||
{
|
||||
return _sid && _sid->isMarked();
|
||||
return _sid && _sid->isMarked();// NOLINT
|
||||
}
|
||||
|
||||
bool isFromSameHasher(const StringHasherRef& hasher) const
|
||||
{
|
||||
return _sid && _sid->isFromSameHasher(hasher);
|
||||
return _sid && _sid->isFromSameHasher(hasher);// NOLINT
|
||||
}
|
||||
|
||||
StringHasherRef getHasher() const
|
||||
@@ -635,13 +635,13 @@ private:
|
||||
/// count. If a duplicate string is added, no additional copy is made, and a new reference to the
|
||||
/// original storage is returned (incrementing the reference counter of the instance).
|
||||
///
|
||||
/// If the string is longer than a given threshold, instead of storing the string, instead its
|
||||
/// SHA1 hash is stored (and the original string discarded). This allows an upper threshold on the
|
||||
/// length of a stored string, while still effectively guaranteeing uniqueness in the table.
|
||||
/// If the string is longer than a given threshold, instead of storing the string, its SHA1 hash is
|
||||
/// stored (and the original string discarded). This allows an upper threshold on the length of a
|
||||
/// stored string, while still effectively guaranteeing uniqueness in the table.
|
||||
class AppExport StringHasher: public Base::Persistence, public Base::Handled
|
||||
{
|
||||
|
||||
TYPESYSTEM_HEADER_WITH_OVERRIDE();
|
||||
TYPESYSTEM_HEADER_WITH_OVERRIDE();// NOLINT
|
||||
|
||||
public:
|
||||
StringHasher();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user