From eb3b0b9d87d2e06180990c37c26733b0e47f6635 Mon Sep 17 00:00:00 2001 From: Kevin Martin Date: Mon, 31 Mar 2025 11:45:58 -0400 Subject: [PATCH] Base: UniqueNameManager support for very long numbers in name (#19943) * Add unit tests for large digit count in unique names * Updated to use arbitrary-precision unsigneds Passes the new unit tests, all diagnostics, and resolves Issue 19881 * Place UnlimitedUnsigned at top level and add unit tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/Base/CMakeLists.txt | 1 + src/Base/UniqueNameManager.cpp | 109 ++----------- src/Base/UniqueNameManager.h | 110 ++++++++++++- src/Base/UnlimitedUnsigned.h | 235 +++++++++++++++++++++++++++ tests/src/Base/CMakeLists.txt | 1 + tests/src/Base/UniqueNameManager.cpp | 16 ++ tests/src/Base/UnlimitedUnsigned.cpp | 55 +++++++ 7 files changed, 420 insertions(+), 107 deletions(-) create mode 100644 src/Base/UnlimitedUnsigned.h create mode 100644 tests/src/Base/UnlimitedUnsigned.cpp diff --git a/src/Base/CMakeLists.txt b/src/Base/CMakeLists.txt index 52f85bd30a..43ee4cc2bb 100644 --- a/src/Base/CMakeLists.txt +++ b/src/Base/CMakeLists.txt @@ -301,6 +301,7 @@ SET(FreeCADBase_HPP_SRCS Tools3D.h Translate.h Type.h + UnlimitedUnsigned.h UniqueNameManager.h Uuid.h Vector3D.h diff --git a/src/Base/UniqueNameManager.cpp b/src/Base/UniqueNameManager.cpp index 34a050606a..a7f580e0a7 100644 --- a/src/Base/UniqueNameManager.cpp +++ b/src/Base/UniqueNameManager.cpp @@ -30,99 +30,7 @@ #endif #include "UniqueNameManager.h" -void Base::UniqueNameManager::PiecewiseSparseIntegerSet::add(unsigned int value) -{ - SpanType newSpan(value, 1); - // Look for the smallest entry not less than newSpan. - // Bear in mind that overlapping spans are neither less than nor greater than ech other. - auto above = spans.lower_bound(newSpan); - if (above != spans.end() && above->first <= value) { - // A span was found that includes value so there is nothing to do as it is already in the - // set. - return; - } - - // Set below to the next span below 'after', if any, otherwise to spans.end(). - // Logically, we want spans.begin()-1 so 'below' is always the entry before 'after', - // but that is not an allowed reference so spans.end() is used - std::set::iterator below; - if (above == spans.begin()) { - // No spans are less than newSpan - // (possibly spans is empty, in which case above == spans.end() too) - below = spans.end(); - } - else { - // At least one span is less than newSpan, - // and 'above' is the next span above that - // (or above == spans.end() if all spans are below newSpan) - below = above; - --below; - } - - // Determine whether the span above (if any) and/or the span below (if any) - // are adjacent to newSpan and if so, merge them appropriately and remove the - // original span(s) that was/were merged, updating newSpan to be the new merged - // span. - if (above != spans.end() && below != spans.end() - && above->first - below->first + 1 == below->second) { - // below and above have a gap of exactly one between them, and this must be value - // so we coalesce the two spans (and the gap) into one. - newSpan = SpanType(below->first, below->second + above->second + 1); - spans.erase(above); - above = spans.erase(below); - } - else if (below != spans.end() && value - below->first == below->second) { - // value is adjacent to the end of below, so just expand below by one - newSpan = SpanType(below->first, below->second + 1); - above = spans.erase(below); - } - else if (above != spans.end() && above->first - value == 1) { - // value is adjacent to the start of above, so just expand above down by one - newSpan = SpanType(above->first - 1, above->second + 1); - above = spans.erase(above); - } - // else value is not adjacent to any existing span, so just make anew span for it - spans.insert(above, newSpan); -} -void Base::UniqueNameManager::PiecewiseSparseIntegerSet::remove(unsigned int value) -{ - SpanType newSpan(value, 1); - auto at = spans.lower_bound(newSpan); - if (at == spans.end() || at->first > value) { - // The found span does not include value so there is nothing to do, as it is already not in - // the set. - return; - } - if (at->second == 1) { - // value is the only in this span, just remove the span - spans.erase(at); - } - else if (at->first == value) { - // value is the first in this span, trim the lower end - SpanType replacement(at->first + 1, at->second - 1); - spans.insert(spans.erase(at), replacement); - } - else if (value - at->first == at->second - 1) { - // value is the last in this span, trim the upper end - SpanType replacement(at->first, at->second - 1); - spans.insert(spans.erase(at), replacement); - } - else { - // value is in the moddle of the span, so we must split it. - SpanType firstReplacement(at->first, value - at->first); - SpanType secondReplacement(value + 1, at->second - ((value + 1) - at->first)); - // Because erase returns the iterator after the erased element, and insert returns the - // iterator for the inserted item, we want to insert secondReplacement first. - spans.insert(spans.insert(spans.erase(at), secondReplacement), firstReplacement); - } -} -bool Base::UniqueNameManager::PiecewiseSparseIntegerSet::contains(unsigned int value) const -{ - auto at = spans.lower_bound(SpanType(value, 1)); - return at != spans.end() && at->first <= value; -} - -std::tuple +std::tuple Base::UniqueNameManager::decomposeName(const std::string& name) const { auto suffixStart = getNameSuffixStartPosition(name); @@ -130,11 +38,11 @@ Base::UniqueNameManager::decomposeName(const std::string& name) const return std::isdigit(c); }); unsigned int digitCount = digitsStart - suffixStart; - return std::tuple { + return std::tuple { name.substr(0, name.crend() - digitsStart), name.substr(name.crend() - suffixStart), digitCount, - digitCount == 0 ? 0U : std::stoul(name.substr(name.crend() - digitsStart, digitCount))}; + UnlimitedUnsigned::fromString(name.substr(name.crend() - digitsStart, digitCount))}; } bool Base::UniqueNameManager::haveSameBaseName(const std::string& first, const std::string& second) const @@ -162,13 +70,16 @@ void Base::UniqueNameManager::addExactName(const std::string& name) if (baseNameEntry == uniqueSeeds.end()) { // First use of baseName baseNameEntry = - uniqueSeeds.emplace(baseName, std::vector()).first; + uniqueSeeds + .emplace(baseName, std::vector>()) + .first; } if (digitCount >= baseNameEntry->second.size()) { // First use of this digitCount baseNameEntry->second.resize(digitCount + 1); } - PiecewiseSparseIntegerSet& baseNameAndDigitCountEntry = baseNameEntry->second[digitCount]; + PiecewiseSparseIntegerSet& baseNameAndDigitCountEntry = + baseNameEntry->second[digitCount]; if (baseNameAndDigitCountEntry.contains(digitsValue)) { // We already have at least one instance of the name. @@ -198,12 +109,12 @@ std::string Base::UniqueNameManager::makeUniqueName(const std::string& modelName // We start the longer digit string at 000...0001 even though we might have shorter strings // with larger numeric values. digitCount = minDigits; - digitsValue = 1; + digitsValue = UnlimitedUnsigned(1); } else { digitsValue = baseNameEntry->second[digitCount].next(); } - std::string digits = std::to_string(digitsValue); + std::string digits = digitsValue.toString(); if (digitCount > digits.size()) { namePrefix += std::string(digitCount - digits.size(), '0'); } diff --git a/src/Base/UniqueNameManager.h b/src/Base/UniqueNameManager.h index c91c9772f1..6797d57be8 100644 --- a/src/Base/UniqueNameManager.h +++ b/src/Base/UniqueNameManager.h @@ -31,6 +31,10 @@ #include #include #include // Forward declares std::tuple +#include +#include +#include +#include "UnlimitedUnsigned.h" // ---------------------------------------------------------------------------- @@ -58,6 +62,7 @@ protected: } private: + template class PiecewiseSparseIntegerSet { public: @@ -66,7 +71,7 @@ private: private: // Each pair being represents the span of integers from lowest to // (lowest+count-1) inclusive - using SpanType = std::pair; + using SpanType = std::pair; // This span Comparer class is analogous to std::less and treats overlapping spans as being // neither greater nor less than each other class Comparer @@ -81,11 +86,100 @@ private: // spans is the set of spans. Adjacent spans are coalesced so there are always gaps between // the entries. std::set spans; + using SpanSetIterator = typename std::set::iterator; public: - void add(unsigned int value); - void remove(unsigned int value); - bool contains(unsigned int value) const; + void add(IT value) + { + SpanType newSpan(value, 1); + // Look for the smallest entry not less than newSpan. + // Bear in mind that overlapping spans are neither less than nor greater than ech other. + auto above = spans.lower_bound(newSpan); + if (above != spans.end() && above->first <= value) { + // A span was found that includes value so there is nothing to do as it is already + // in the set. + return; + } + + // Set below to the next span below 'after', if any, otherwise to spans.end(). + // Logically, we want spans.begin()-1 so 'below' is always the entry before 'after', + // but that is not an allowed reference so spans.end() is used + SpanSetIterator below; + if (above == spans.begin()) { + // No spans are less than newSpan + // (possibly spans is empty, in which case above == spans.end() too) + below = spans.end(); + } + else { + // At least one span is less than newSpan, + // and 'above' is the next span above that + // (or above == spans.end() if all spans are below newSpan) + below = above; + --below; + } + + // Determine whether the span above (if any) and/or the span below (if any) + // are adjacent to newSpan and if so, merge them appropriately and remove the + // original span(s) that was/were merged, updating newSpan to be the new merged + // span. + if (above != spans.end() && below != spans.end() + && above->first - below->first + 1 == below->second) { + // below and above have a gap of exactly one between them, and this must be value + // so we coalesce the two spans (and the gap) into one. + newSpan = SpanType(below->first, below->second + above->second + 1); + spans.erase(above); + above = spans.erase(below); + } + else if (below != spans.end() && value - below->first == below->second) { + // value is adjacent to the end of below, so just expand below by one + newSpan = SpanType(below->first, below->second + 1); + above = spans.erase(below); + } + else if (above != spans.end() && above->first - value == 1) { + // value is adjacent to the start of above, so just expand above down by one + newSpan = SpanType(above->first - 1, above->second + 1); + above = spans.erase(above); + } + // else value is not adjacent to any existing span, so just make anew span for it + spans.insert(above, newSpan); + } + void remove(IT value) + { + SpanType newSpan(value, 1); + auto at = spans.lower_bound(newSpan); + if (at == spans.end() || at->first > value) { + // The found span does not include value so there is nothing to do, as it is already + // not in the set. + return; + } + if (at->second == 1) { + // value is the only in this span, just remove the span + spans.erase(at); + } + else if (at->first == value) { + // value is the first in this span, trim the lower end + SpanType replacement(at->first + 1, at->second - 1); + spans.insert(spans.erase(at), replacement); + } + else if (value - at->first == at->second - 1) { + // value is the last in this span, trim the upper end + SpanType replacement(at->first, at->second - 1); + spans.insert(spans.erase(at), replacement); + } + else { + // value is in the moddle of the span, so we must split it. + SpanType firstReplacement(at->first, value - at->first); + SpanType secondReplacement(value + 1, at->second - ((value + 1) - at->first)); + // Because erase returns the iterator after the erased element, and insert returns + // the iterator for the inserted item, we want to insert secondReplacement first. + spans.insert(spans.insert(spans.erase(at), secondReplacement), firstReplacement); + } + } + bool contains(IT value) const + { + auto at = spans.lower_bound(SpanType(value, IT(1))); + return at != spans.end() && at->first <= value; + } bool empty() const { return spans.empty(); @@ -94,10 +188,10 @@ private: { spans.clear(); } - unsigned int next() const + IT next() const { if (spans.empty()) { - return 0; + return IT(0); } auto last = spans.end(); --last; @@ -107,7 +201,7 @@ private: // Keyed as uniqueSeeds[baseName][digitCount][digitValue] iff that seed is taken. // We need the double-indexing so that Name01 and Name001 can both be indexed, although we only // ever allocate off the longest for each name i.e. uniqueSeeds[baseName].size()-1 digits. - std::map> uniqueSeeds; + std::map>> uniqueSeeds; // Counts of inserted strings that have duplicates, i.e. more than one instance in the // collection. This does not contain entries for singleton names. std::map duplicateCounts; @@ -116,7 +210,7 @@ private: /// @param name The name to break up /// @return a tuple(basePrefix, nameSuffix, uniqueDigitCount, uniqueDigitsValue); /// The two latter values will be (0,0) if name is a base name without uniquifying digits. - std::tuple + std::tuple decomposeName(const std::string& name) const; public: diff --git a/src/Base/UnlimitedUnsigned.h b/src/Base/UnlimitedUnsigned.h new file mode 100644 index 0000000000..36e6eaf1cf --- /dev/null +++ b/src/Base/UnlimitedUnsigned.h @@ -0,0 +1,235 @@ +/*************************************************************************** + * Copyright (c) 2025 Kevin Martin * + * * + * This file is part of FreeCAD. * + * * + * FreeCAD is free software: you can redistribute it and/or modify it * + * under the terms of the GNU Lesser General Public License as * + * published by the Free Software Foundation, either version 2.1 of the * + * License, or (at your option) any later version. * + * * + * FreeCAD is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public * + * License along with FreeCAD. If not, see * + * . * + * * + ***************************************************************************/ + + +#ifndef SRC_BASE_UNLIMITEDUNSIGNED_H_ +#define SRC_BASE_UNLIMITEDUNSIGNED_H_ + +#ifndef FC_GLOBAL_H +#include +#endif +#include +#include +#include + +// ---------------------------------------------------------------------------- + +namespace Base +{ +class UnlimitedUnsigned +{ +private: + // This should be a signed type so we can get transient negative values during calculations + // without resorting to a larger type + using PartType = int32_t; + // The following must be a type that limits the value to >= 0 and < maxPartPlusOne + using SmallDeltaType = unsigned char; + explicit UnlimitedUnsigned(std::vector&& partsVector) + : parts(std::move(partsVector)) + {} + // We take a really cheap floor(bitcount * log10(max PartType)). 3/10 is just a bit less + // than log10(2) The value should be sizeof(PartType) * CHAR_BIT * 3 / 10 but we can't + // calculate the corresponding maxPartPlusOne in a static const initializer, so we just wire + // in the values for 4-byte PartType. + static const size_t partDigitCount = 9; + static const PartType maxPartPlusOne = + 1000000000; // (PartType)pow(10, partDigitCount); but can't call pow in a const ctor. + +public: + explicit UnlimitedUnsigned(SmallDeltaType value) + : parts(std::vector(1, value)) + {} + // Parse a decimal digit string into an UnlimitedUnsigned. There should be no white space, + // only digits. a zero-length string parses as zero. + static UnlimitedUnsigned fromString(const std::string& text) + { + std::vector result((text.size() + partDigitCount - 1) / partDigitCount); + size_t minimumSize = 0; + size_t lastStartPosition = text.size(); + // Parse chunks of digits starting with the least significant + for (size_t i = 0; i < result.size(); i++) { + if (partDigitCount >= lastStartPosition) { + // partial chunk of leading digits + result[i] = static_cast(std::stoul(text.substr(0, lastStartPosition))); + } + else { + lastStartPosition -= partDigitCount; + result[i] = static_cast( + std::stoul(text.substr(lastStartPosition, partDigitCount))); + } + if (result[i] != 0) { + minimumSize = i + 1; + } + } + result.resize(minimumSize); + return UnlimitedUnsigned(std::move(result)); + } + std::string toString() + { + std::string result; + result.reserve(parts.size() * partDigitCount); + // Format the chunks from most- to least-significant + for (auto it = parts.rbegin(); it != parts.rend(); it++) { + std::string partDigits = std::to_string(*it); + if (!result.empty()) { + size_t padding = partDigitCount - partDigits.size(); + if (padding > 0) { + result += std::string(padding, '0'); + } + } + result += partDigits; + } + return result; + } + // TODO: Some allocation of vector contents could be avoided by defining methods like + // static UnlimitedUnsigned operator+(UnlimitedUnsigned&& left, const UnlimitedUnsigned& + // right); static UnlimitedUnsigned operator+(const UnlimitedUnsigned& left, + // UnlimitedUnsigned&& right); static UnlimitedUnsigned operator+(UnlimitedUnsigned&& left, + // UnlimitedUnsigned&& right); which would re-use left.parts or right.parts after possibly + // growing it. The last one would use the larger of the two buffers + UnlimitedUnsigned operator+(const UnlimitedUnsigned& right) const + { + size_t resultSize = std::max(parts.size(), right.parts.size()); + std::vector result(resultSize); + PartType carry = 0; + for (size_t i = 0; i < resultSize; i++) { + auto newPart = (i < parts.size() ? parts[i] : 0) + + (i < right.parts.size() ? right.parts[i] : 0) + carry; + if (newPart < maxPartPlusOne) { + carry = 0; + } + else { + carry = 1; + newPart -= maxPartPlusOne; + } + result[i] = newPart; + } + if (carry > 0) { + result.resize(resultSize + 1); + result[resultSize] = carry; + } + return UnlimitedUnsigned(std::move(result)); + } + UnlimitedUnsigned operator+(SmallDeltaType right) const + { + size_t resultSize = parts.size(); + std::vector result(resultSize); + PartType carry = right; + for (size_t i = 0; i < resultSize; i++) { + auto newPart = parts[i] + carry; + if (newPart < maxPartPlusOne) { + carry = 0; + } + else { + carry = 1; + newPart -= maxPartPlusOne; + } + result[i] = newPart; + } + if (carry > 0) { + result.resize(resultSize + 1); + result[resultSize] = carry; + } + return UnlimitedUnsigned(std::move(result)); + } + UnlimitedUnsigned operator-(const UnlimitedUnsigned& right) const + { + size_t resultSize = std::max(parts.size(), right.parts.size()); + std::vector result(resultSize); + PartType borrow = 0; + size_t lastNonZero = 0; + for (size_t i = 0; i < resultSize; i++) { + auto newPart = (i < parts.size() ? parts[i] : 0) + - (i < right.parts.size() ? right.parts[i] : 0) - borrow; + if (newPart >= 0) { + borrow = 0; + } + else { + borrow = 1; + newPart += maxPartPlusOne; + } + result[i] = newPart; + if (newPart != 0) { + lastNonZero = i + 1; + } + } + if (borrow > 0) { + throw std::overflow_error("UnlimitedUnsigned arithmetic produced a negative result"); + } + result.resize(lastNonZero); + return UnlimitedUnsigned(std::move(result)); + } + UnlimitedUnsigned operator-(SmallDeltaType right) const + { + size_t resultSize = parts.size(); + std::vector result(resultSize); + PartType borrow = right; + for (size_t i = 0; i < resultSize; i++) { + auto newPart = parts[i] - borrow; + if (newPart >= 0) { + borrow = 0; + } + else { + borrow = 1; + newPart += maxPartPlusOne; + } + result[i] = newPart; + } + if (borrow > 0) { + throw std::overflow_error("UnlimitedUnsigned arithmetic produced a negative result"); + } + // Note that by here resultSize > 0, otherwise the original this == 0 and we would have + // the above exception. + if (result[resultSize - 1] == 0) { + result.resize(resultSize - 1); + } + return UnlimitedUnsigned(std::move(result)); + } + bool operator<=(const UnlimitedUnsigned& right) const + { + // lexicographical_compare is logically a < operation, so we reverse the operands and + // invert the result to get <= and we compare the most significant chunks first. + return parts.size() < right.parts.size() + || (parts.size() == right.parts.size() + && !std::lexicographical_compare(right.parts.rbegin(), + right.parts.rend(), + parts.rbegin(), + parts.rend())); + } + bool operator>(const UnlimitedUnsigned& right) const + { + return !(*this <= right); + } + bool operator==(const UnlimitedUnsigned& right) const + { + return parts == right.parts; + } + bool operator==(SmallDeltaType right) const + { + return right == 0 ? parts.empty() : (parts.size() == 1 && parts[0] == right); + } + +private: + std::vector parts; +}; +} // namespace Base + +#endif // SRC_BASE_UNLIMITEDUNSIGNED_H_ diff --git a/tests/src/Base/CMakeLists.txt b/tests/src/Base/CMakeLists.txt index ac7c77c8fc..154831216f 100644 --- a/tests/src/Base/CMakeLists.txt +++ b/tests/src/Base/CMakeLists.txt @@ -21,6 +21,7 @@ target_sources(Tests_run PRIVATE Tools.cpp Tools2D.cpp Tools3D.cpp + UnlimitedUnsigned.cpp UniqueNameManager.cpp Unit.cpp Vector3D.cpp diff --git a/tests/src/Base/UniqueNameManager.cpp b/tests/src/Base/UniqueNameManager.cpp index a9ff22f3ff..ba502eafd2 100644 --- a/tests/src/Base/UniqueNameManager.cpp +++ b/tests/src/Base/UniqueNameManager.cpp @@ -106,4 +106,20 @@ TEST(UniqueNameManager, Issue18504) name = manager.makeUniqueName("Origin", 3); EXPECT_NE(name, "Origin010"); } + +TEST(UniqueNameManager, UniqueNameWithManyDigits) +{ + // Check that names with many digits (value larger than max unsigned long) work + Base::UniqueNameManager manager; + manager.addExactName("Compound006002002002002"); + EXPECT_EQ(manager.makeUniqueName("Compound", 3), "Compound006002002002003"); +} +TEST(UniqueNameManager, UniqueNameWith9NDigits) +{ + // Check that names with a multiple of 9 digits work. The manager chunks nine digits at a time + // so this boundary condition needs a test. + Base::UniqueNameManager manager; + manager.addExactName("Compound123456789"); + EXPECT_EQ(manager.makeUniqueName("Compound", 3), "Compound123456790"); +} // NOLINTEND(cppcoreguidelines-*,readability-*) diff --git a/tests/src/Base/UnlimitedUnsigned.cpp b/tests/src/Base/UnlimitedUnsigned.cpp new file mode 100644 index 0000000000..240c7ed141 --- /dev/null +++ b/tests/src/Base/UnlimitedUnsigned.cpp @@ -0,0 +1,55 @@ +/*************************************************************************** + * Copyright (c) 2025 Kevin Martin * + * * + * This file is part of FreeCAD. * + * * + * FreeCAD is free software: you can redistribute it and/or modify it * + * under the terms of the GNU Lesser General Public License as * + * published by the Free Software Foundation, either version 2.1 of the * + * License, or (at your option) any later version. * + * * + * FreeCAD is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public * + * License along with FreeCAD. If not, see * + * . * + * * + ***************************************************************************/ +#include +#include +#include + +// NOLINTBEGIN(cppcoreguidelines-*,readability-*) +TEST(UnlimitedUnsigned, Basics) +{ + // Check simple addition with carry and conversion from string + Base::UnlimitedUnsigned one(1); + auto nines = Base::UnlimitedUnsigned::fromString("999999999"); + EXPECT_EQ(nines + one, Base::UnlimitedUnsigned::fromString("1000000000")); +} +TEST(UnlimitedUnsigned, ToString) +{ + // Check toString on simple addition result + Base::UnlimitedUnsigned one(1); + auto nines = Base::UnlimitedUnsigned::fromString("999999999"); + EXPECT_EQ((nines + one).toString(), "1000000000"); +} +TEST(UnlimitedUnsigned, TestSubtraction1) +{ + // Check subtraction and comparison with byte-sized number + EXPECT_EQ(Base::UnlimitedUnsigned::fromString("6842357951") + - Base::UnlimitedUnsigned::fromString("6842357948"), + 3); +} +TEST(UnlimitedUnsigned, TestSubtraction2) +{ + // Check subtraction and comparison + EXPECT_EQ(Base::UnlimitedUnsigned::fromString("6842357951") + - Base::UnlimitedUnsigned::fromString("6000000000"), + Base::UnlimitedUnsigned::fromString("842357951")); +} + +// NOLINTEND(cppcoreguidelines-*,readability-*)