Base: UniqueNameManager support for very long numbers in name (#19943)

* Add unit tests for large digit count in unique names

* Updated to use arbitrary-precision unsigneds

Passes the new unit tests, all diagnostics, and resolves Issue 19881

* Place UnlimitedUnsigned at top level and add unit tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Kevin Martin
2025-03-31 11:45:58 -04:00
committed by GitHub
parent 4ca7d1a297
commit eb3b0b9d87
7 changed files with 420 additions and 107 deletions

View File

@@ -301,6 +301,7 @@ SET(FreeCADBase_HPP_SRCS
Tools3D.h
Translate.h
Type.h
UnlimitedUnsigned.h
UniqueNameManager.h
Uuid.h
Vector3D.h

View File

@@ -30,99 +30,7 @@
#endif
#include "UniqueNameManager.h"
void Base::UniqueNameManager::PiecewiseSparseIntegerSet::add(unsigned int value)
{
SpanType newSpan(value, 1);
// Look for the smallest entry not less than newSpan.
// Bear in mind that overlapping spans are neither less than nor greater than ech other.
auto above = spans.lower_bound(newSpan);
if (above != spans.end() && above->first <= value) {
// A span was found that includes value so there is nothing to do as it is already in the
// set.
return;
}
// Set below to the next span below 'after', if any, otherwise to spans.end().
// Logically, we want spans.begin()-1 so 'below' is always the entry before 'after',
// but that is not an allowed reference so spans.end() is used
std::set<SpanType, Comparer>::iterator below;
if (above == spans.begin()) {
// No spans are less than newSpan
// (possibly spans is empty, in which case above == spans.end() too)
below = spans.end();
}
else {
// At least one span is less than newSpan,
// and 'above' is the next span above that
// (or above == spans.end() if all spans are below newSpan)
below = above;
--below;
}
// Determine whether the span above (if any) and/or the span below (if any)
// are adjacent to newSpan and if so, merge them appropriately and remove the
// original span(s) that was/were merged, updating newSpan to be the new merged
// span.
if (above != spans.end() && below != spans.end()
&& above->first - below->first + 1 == below->second) {
// below and above have a gap of exactly one between them, and this must be value
// so we coalesce the two spans (and the gap) into one.
newSpan = SpanType(below->first, below->second + above->second + 1);
spans.erase(above);
above = spans.erase(below);
}
else if (below != spans.end() && value - below->first == below->second) {
// value is adjacent to the end of below, so just expand below by one
newSpan = SpanType(below->first, below->second + 1);
above = spans.erase(below);
}
else if (above != spans.end() && above->first - value == 1) {
// value is adjacent to the start of above, so just expand above down by one
newSpan = SpanType(above->first - 1, above->second + 1);
above = spans.erase(above);
}
// else value is not adjacent to any existing span, so just make anew span for it
spans.insert(above, newSpan);
}
void Base::UniqueNameManager::PiecewiseSparseIntegerSet::remove(unsigned int value)
{
SpanType newSpan(value, 1);
auto at = spans.lower_bound(newSpan);
if (at == spans.end() || at->first > value) {
// The found span does not include value so there is nothing to do, as it is already not in
// the set.
return;
}
if (at->second == 1) {
// value is the only in this span, just remove the span
spans.erase(at);
}
else if (at->first == value) {
// value is the first in this span, trim the lower end
SpanType replacement(at->first + 1, at->second - 1);
spans.insert(spans.erase(at), replacement);
}
else if (value - at->first == at->second - 1) {
// value is the last in this span, trim the upper end
SpanType replacement(at->first, at->second - 1);
spans.insert(spans.erase(at), replacement);
}
else {
// value is in the moddle of the span, so we must split it.
SpanType firstReplacement(at->first, value - at->first);
SpanType secondReplacement(value + 1, at->second - ((value + 1) - at->first));
// Because erase returns the iterator after the erased element, and insert returns the
// iterator for the inserted item, we want to insert secondReplacement first.
spans.insert(spans.insert(spans.erase(at), secondReplacement), firstReplacement);
}
}
bool Base::UniqueNameManager::PiecewiseSparseIntegerSet::contains(unsigned int value) const
{
auto at = spans.lower_bound(SpanType(value, 1));
return at != spans.end() && at->first <= value;
}
std::tuple<std::string, std::string, unsigned int, unsigned int>
std::tuple<std::string, std::string, unsigned int, Base::UnlimitedUnsigned>
Base::UniqueNameManager::decomposeName(const std::string& name) const
{
auto suffixStart = getNameSuffixStartPosition(name);
@@ -130,11 +38,11 @@ Base::UniqueNameManager::decomposeName(const std::string& name) const
return std::isdigit(c);
});
unsigned int digitCount = digitsStart - suffixStart;
return std::tuple<std::string, std::string, unsigned int, unsigned int> {
return std::tuple<std::string, std::string, unsigned int, UnlimitedUnsigned> {
name.substr(0, name.crend() - digitsStart),
name.substr(name.crend() - suffixStart),
digitCount,
digitCount == 0 ? 0U : std::stoul(name.substr(name.crend() - digitsStart, digitCount))};
UnlimitedUnsigned::fromString(name.substr(name.crend() - digitsStart, digitCount))};
}
bool Base::UniqueNameManager::haveSameBaseName(const std::string& first,
const std::string& second) const
@@ -162,13 +70,16 @@ void Base::UniqueNameManager::addExactName(const std::string& name)
if (baseNameEntry == uniqueSeeds.end()) {
// First use of baseName
baseNameEntry =
uniqueSeeds.emplace(baseName, std::vector<PiecewiseSparseIntegerSet>()).first;
uniqueSeeds
.emplace(baseName, std::vector<PiecewiseSparseIntegerSet<UnlimitedUnsigned>>())
.first;
}
if (digitCount >= baseNameEntry->second.size()) {
// First use of this digitCount
baseNameEntry->second.resize(digitCount + 1);
}
PiecewiseSparseIntegerSet& baseNameAndDigitCountEntry = baseNameEntry->second[digitCount];
PiecewiseSparseIntegerSet<UnlimitedUnsigned>& baseNameAndDigitCountEntry =
baseNameEntry->second[digitCount];
if (baseNameAndDigitCountEntry.contains(digitsValue)) {
// We already have at least one instance of the name.
@@ -198,12 +109,12 @@ std::string Base::UniqueNameManager::makeUniqueName(const std::string& modelName
// We start the longer digit string at 000...0001 even though we might have shorter strings
// with larger numeric values.
digitCount = minDigits;
digitsValue = 1;
digitsValue = UnlimitedUnsigned(1);
}
else {
digitsValue = baseNameEntry->second[digitCount].next();
}
std::string digits = std::to_string(digitsValue);
std::string digits = digitsValue.toString();
if (digitCount > digits.size()) {
namePrefix += std::string(digitCount - digits.size(), '0');
}

View File

@@ -31,6 +31,10 @@
#include <set>
#include <map>
#include <utility> // Forward declares std::tuple
#include <stdexcept>
#include <algorithm>
#include <tuple>
#include "UnlimitedUnsigned.h"
// ----------------------------------------------------------------------------
@@ -58,6 +62,7 @@ protected:
}
private:
template<typename IT>
class PiecewiseSparseIntegerSet
{
public:
@@ -66,7 +71,7 @@ private:
private:
// Each pair being <lowest, count> represents the span of integers from lowest to
// (lowest+count-1) inclusive
using SpanType = std::pair<unsigned int, unsigned int>;
using SpanType = std::pair<IT, IT>;
// This span Comparer class is analogous to std::less and treats overlapping spans as being
// neither greater nor less than each other
class Comparer
@@ -81,11 +86,100 @@ private:
// spans is the set of spans. Adjacent spans are coalesced so there are always gaps between
// the entries.
std::set<SpanType, Comparer> spans;
using SpanSetIterator = typename std::set<SpanType, Comparer>::iterator;
public:
void add(unsigned int value);
void remove(unsigned int value);
bool contains(unsigned int value) const;
void add(IT value)
{
SpanType newSpan(value, 1);
// Look for the smallest entry not less than newSpan.
// Bear in mind that overlapping spans are neither less than nor greater than ech other.
auto above = spans.lower_bound(newSpan);
if (above != spans.end() && above->first <= value) {
// A span was found that includes value so there is nothing to do as it is already
// in the set.
return;
}
// Set below to the next span below 'after', if any, otherwise to spans.end().
// Logically, we want spans.begin()-1 so 'below' is always the entry before 'after',
// but that is not an allowed reference so spans.end() is used
SpanSetIterator below;
if (above == spans.begin()) {
// No spans are less than newSpan
// (possibly spans is empty, in which case above == spans.end() too)
below = spans.end();
}
else {
// At least one span is less than newSpan,
// and 'above' is the next span above that
// (or above == spans.end() if all spans are below newSpan)
below = above;
--below;
}
// Determine whether the span above (if any) and/or the span below (if any)
// are adjacent to newSpan and if so, merge them appropriately and remove the
// original span(s) that was/were merged, updating newSpan to be the new merged
// span.
if (above != spans.end() && below != spans.end()
&& above->first - below->first + 1 == below->second) {
// below and above have a gap of exactly one between them, and this must be value
// so we coalesce the two spans (and the gap) into one.
newSpan = SpanType(below->first, below->second + above->second + 1);
spans.erase(above);
above = spans.erase(below);
}
else if (below != spans.end() && value - below->first == below->second) {
// value is adjacent to the end of below, so just expand below by one
newSpan = SpanType(below->first, below->second + 1);
above = spans.erase(below);
}
else if (above != spans.end() && above->first - value == 1) {
// value is adjacent to the start of above, so just expand above down by one
newSpan = SpanType(above->first - 1, above->second + 1);
above = spans.erase(above);
}
// else value is not adjacent to any existing span, so just make anew span for it
spans.insert(above, newSpan);
}
void remove(IT value)
{
SpanType newSpan(value, 1);
auto at = spans.lower_bound(newSpan);
if (at == spans.end() || at->first > value) {
// The found span does not include value so there is nothing to do, as it is already
// not in the set.
return;
}
if (at->second == 1) {
// value is the only in this span, just remove the span
spans.erase(at);
}
else if (at->first == value) {
// value is the first in this span, trim the lower end
SpanType replacement(at->first + 1, at->second - 1);
spans.insert(spans.erase(at), replacement);
}
else if (value - at->first == at->second - 1) {
// value is the last in this span, trim the upper end
SpanType replacement(at->first, at->second - 1);
spans.insert(spans.erase(at), replacement);
}
else {
// value is in the moddle of the span, so we must split it.
SpanType firstReplacement(at->first, value - at->first);
SpanType secondReplacement(value + 1, at->second - ((value + 1) - at->first));
// Because erase returns the iterator after the erased element, and insert returns
// the iterator for the inserted item, we want to insert secondReplacement first.
spans.insert(spans.insert(spans.erase(at), secondReplacement), firstReplacement);
}
}
bool contains(IT value) const
{
auto at = spans.lower_bound(SpanType(value, IT(1)));
return at != spans.end() && at->first <= value;
}
bool empty() const
{
return spans.empty();
@@ -94,10 +188,10 @@ private:
{
spans.clear();
}
unsigned int next() const
IT next() const
{
if (spans.empty()) {
return 0;
return IT(0);
}
auto last = spans.end();
--last;
@@ -107,7 +201,7 @@ private:
// Keyed as uniqueSeeds[baseName][digitCount][digitValue] iff that seed is taken.
// We need the double-indexing so that Name01 and Name001 can both be indexed, although we only
// ever allocate off the longest for each name i.e. uniqueSeeds[baseName].size()-1 digits.
std::map<std::string, std::vector<PiecewiseSparseIntegerSet>> uniqueSeeds;
std::map<std::string, std::vector<PiecewiseSparseIntegerSet<UnlimitedUnsigned>>> uniqueSeeds;
// Counts of inserted strings that have duplicates, i.e. more than one instance in the
// collection. This does not contain entries for singleton names.
std::map<std::string, unsigned int> duplicateCounts;
@@ -116,7 +210,7 @@ private:
/// @param name The name to break up
/// @return a tuple(basePrefix, nameSuffix, uniqueDigitCount, uniqueDigitsValue);
/// The two latter values will be (0,0) if name is a base name without uniquifying digits.
std::tuple<std::string, std::string, unsigned int, unsigned int>
std::tuple<std::string, std::string, unsigned int, UnlimitedUnsigned>
decomposeName(const std::string& name) const;
public:

View File

@@ -0,0 +1,235 @@
/***************************************************************************
* Copyright (c) 2025 Kevin Martin <kpmartin@papertrail.ca> *
* *
* This file is part of FreeCAD. *
* *
* FreeCAD is free software: you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as *
* published by the Free Software Foundation, either version 2.1 of the *
* License, or (at your option) any later version. *
* *
* FreeCAD is distributed in the hope that it will be useful, but *
* WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public *
* License along with FreeCAD. If not, see *
* <https://www.gnu.org/licenses/>. *
* *
***************************************************************************/
#ifndef SRC_BASE_UNLIMITEDUNSIGNED_H_
#define SRC_BASE_UNLIMITEDUNSIGNED_H_
#ifndef FC_GLOBAL_H
#include <FCGlobal.h>
#endif
#include <vector>
#include <string>
#include <stdexcept>
// ----------------------------------------------------------------------------
namespace Base
{
class UnlimitedUnsigned
{
private:
// This should be a signed type so we can get transient negative values during calculations
// without resorting to a larger type
using PartType = int32_t;
// The following must be a type that limits the value to >= 0 and < maxPartPlusOne
using SmallDeltaType = unsigned char;
explicit UnlimitedUnsigned(std::vector<PartType>&& partsVector)
: parts(std::move(partsVector))
{}
// We take a really cheap floor(bitcount * log10(max PartType)). 3/10 is just a bit less
// than log10(2) The value should be sizeof(PartType) * CHAR_BIT * 3 / 10 but we can't
// calculate the corresponding maxPartPlusOne in a static const initializer, so we just wire
// in the values for 4-byte PartType.
static const size_t partDigitCount = 9;
static const PartType maxPartPlusOne =
1000000000; // (PartType)pow(10, partDigitCount); but can't call pow in a const ctor.
public:
explicit UnlimitedUnsigned(SmallDeltaType value)
: parts(std::vector<PartType>(1, value))
{}
// Parse a decimal digit string into an UnlimitedUnsigned. There should be no white space,
// only digits. a zero-length string parses as zero.
static UnlimitedUnsigned fromString(const std::string& text)
{
std::vector<PartType> result((text.size() + partDigitCount - 1) / partDigitCount);
size_t minimumSize = 0;
size_t lastStartPosition = text.size();
// Parse chunks of digits starting with the least significant
for (size_t i = 0; i < result.size(); i++) {
if (partDigitCount >= lastStartPosition) {
// partial chunk of leading digits
result[i] = static_cast<PartType>(std::stoul(text.substr(0, lastStartPosition)));
}
else {
lastStartPosition -= partDigitCount;
result[i] = static_cast<PartType>(
std::stoul(text.substr(lastStartPosition, partDigitCount)));
}
if (result[i] != 0) {
minimumSize = i + 1;
}
}
result.resize(minimumSize);
return UnlimitedUnsigned(std::move(result));
}
std::string toString()
{
std::string result;
result.reserve(parts.size() * partDigitCount);
// Format the chunks from most- to least-significant
for (auto it = parts.rbegin(); it != parts.rend(); it++) {
std::string partDigits = std::to_string(*it);
if (!result.empty()) {
size_t padding = partDigitCount - partDigits.size();
if (padding > 0) {
result += std::string(padding, '0');
}
}
result += partDigits;
}
return result;
}
// TODO: Some allocation of vector contents could be avoided by defining methods like
// static UnlimitedUnsigned operator+(UnlimitedUnsigned&& left, const UnlimitedUnsigned&
// right); static UnlimitedUnsigned operator+(const UnlimitedUnsigned& left,
// UnlimitedUnsigned&& right); static UnlimitedUnsigned operator+(UnlimitedUnsigned&& left,
// UnlimitedUnsigned&& right); which would re-use left.parts or right.parts after possibly
// growing it. The last one would use the larger of the two buffers
UnlimitedUnsigned operator+(const UnlimitedUnsigned& right) const
{
size_t resultSize = std::max(parts.size(), right.parts.size());
std::vector<PartType> result(resultSize);
PartType carry = 0;
for (size_t i = 0; i < resultSize; i++) {
auto newPart = (i < parts.size() ? parts[i] : 0)
+ (i < right.parts.size() ? right.parts[i] : 0) + carry;
if (newPart < maxPartPlusOne) {
carry = 0;
}
else {
carry = 1;
newPart -= maxPartPlusOne;
}
result[i] = newPart;
}
if (carry > 0) {
result.resize(resultSize + 1);
result[resultSize] = carry;
}
return UnlimitedUnsigned(std::move(result));
}
UnlimitedUnsigned operator+(SmallDeltaType right) const
{
size_t resultSize = parts.size();
std::vector<PartType> result(resultSize);
PartType carry = right;
for (size_t i = 0; i < resultSize; i++) {
auto newPart = parts[i] + carry;
if (newPart < maxPartPlusOne) {
carry = 0;
}
else {
carry = 1;
newPart -= maxPartPlusOne;
}
result[i] = newPart;
}
if (carry > 0) {
result.resize(resultSize + 1);
result[resultSize] = carry;
}
return UnlimitedUnsigned(std::move(result));
}
UnlimitedUnsigned operator-(const UnlimitedUnsigned& right) const
{
size_t resultSize = std::max(parts.size(), right.parts.size());
std::vector<PartType> result(resultSize);
PartType borrow = 0;
size_t lastNonZero = 0;
for (size_t i = 0; i < resultSize; i++) {
auto newPart = (i < parts.size() ? parts[i] : 0)
- (i < right.parts.size() ? right.parts[i] : 0) - borrow;
if (newPart >= 0) {
borrow = 0;
}
else {
borrow = 1;
newPart += maxPartPlusOne;
}
result[i] = newPart;
if (newPart != 0) {
lastNonZero = i + 1;
}
}
if (borrow > 0) {
throw std::overflow_error("UnlimitedUnsigned arithmetic produced a negative result");
}
result.resize(lastNonZero);
return UnlimitedUnsigned(std::move(result));
}
UnlimitedUnsigned operator-(SmallDeltaType right) const
{
size_t resultSize = parts.size();
std::vector<PartType> result(resultSize);
PartType borrow = right;
for (size_t i = 0; i < resultSize; i++) {
auto newPart = parts[i] - borrow;
if (newPart >= 0) {
borrow = 0;
}
else {
borrow = 1;
newPart += maxPartPlusOne;
}
result[i] = newPart;
}
if (borrow > 0) {
throw std::overflow_error("UnlimitedUnsigned arithmetic produced a negative result");
}
// Note that by here resultSize > 0, otherwise the original this == 0 and we would have
// the above exception.
if (result[resultSize - 1] == 0) {
result.resize(resultSize - 1);
}
return UnlimitedUnsigned(std::move(result));
}
bool operator<=(const UnlimitedUnsigned& right) const
{
// lexicographical_compare is logically a < operation, so we reverse the operands and
// invert the result to get <= and we compare the most significant chunks first.
return parts.size() < right.parts.size()
|| (parts.size() == right.parts.size()
&& !std::lexicographical_compare(right.parts.rbegin(),
right.parts.rend(),
parts.rbegin(),
parts.rend()));
}
bool operator>(const UnlimitedUnsigned& right) const
{
return !(*this <= right);
}
bool operator==(const UnlimitedUnsigned& right) const
{
return parts == right.parts;
}
bool operator==(SmallDeltaType right) const
{
return right == 0 ? parts.empty() : (parts.size() == 1 && parts[0] == right);
}
private:
std::vector<PartType> parts;
};
} // namespace Base
#endif // SRC_BASE_UNLIMITEDUNSIGNED_H_

View File

@@ -21,6 +21,7 @@ target_sources(Tests_run PRIVATE
Tools.cpp
Tools2D.cpp
Tools3D.cpp
UnlimitedUnsigned.cpp
UniqueNameManager.cpp
Unit.cpp
Vector3D.cpp

View File

@@ -106,4 +106,20 @@ TEST(UniqueNameManager, Issue18504)
name = manager.makeUniqueName("Origin", 3);
EXPECT_NE(name, "Origin010");
}
TEST(UniqueNameManager, UniqueNameWithManyDigits)
{
// Check that names with many digits (value larger than max unsigned long) work
Base::UniqueNameManager manager;
manager.addExactName("Compound006002002002002");
EXPECT_EQ(manager.makeUniqueName("Compound", 3), "Compound006002002002003");
}
TEST(UniqueNameManager, UniqueNameWith9NDigits)
{
// Check that names with a multiple of 9 digits work. The manager chunks nine digits at a time
// so this boundary condition needs a test.
Base::UniqueNameManager manager;
manager.addExactName("Compound123456789");
EXPECT_EQ(manager.makeUniqueName("Compound", 3), "Compound123456790");
}
// NOLINTEND(cppcoreguidelines-*,readability-*)

View File

@@ -0,0 +1,55 @@
/***************************************************************************
* Copyright (c) 2025 Kevin Martin <kpmartin@papertrail.ca> *
* *
* This file is part of FreeCAD. *
* *
* FreeCAD is free software: you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as *
* published by the Free Software Foundation, either version 2.1 of the *
* License, or (at your option) any later version. *
* *
* FreeCAD is distributed in the hope that it will be useful, but *
* WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public *
* License along with FreeCAD. If not, see *
* <https://www.gnu.org/licenses/>. *
* *
***************************************************************************/
#include <gtest/gtest.h>
#include <string>
#include <Base/UnlimitedUnsigned.h>
// NOLINTBEGIN(cppcoreguidelines-*,readability-*)
TEST(UnlimitedUnsigned, Basics)
{
// Check simple addition with carry and conversion from string
Base::UnlimitedUnsigned one(1);
auto nines = Base::UnlimitedUnsigned::fromString("999999999");
EXPECT_EQ(nines + one, Base::UnlimitedUnsigned::fromString("1000000000"));
}
TEST(UnlimitedUnsigned, ToString)
{
// Check toString on simple addition result
Base::UnlimitedUnsigned one(1);
auto nines = Base::UnlimitedUnsigned::fromString("999999999");
EXPECT_EQ((nines + one).toString(), "1000000000");
}
TEST(UnlimitedUnsigned, TestSubtraction1)
{
// Check subtraction and comparison with byte-sized number
EXPECT_EQ(Base::UnlimitedUnsigned::fromString("6842357951")
- Base::UnlimitedUnsigned::fromString("6842357948"),
3);
}
TEST(UnlimitedUnsigned, TestSubtraction2)
{
// Check subtraction and comparison
EXPECT_EQ(Base::UnlimitedUnsigned::fromString("6842357951")
- Base::UnlimitedUnsigned::fromString("6000000000"),
Base::UnlimitedUnsigned::fromString("842357951"));
}
// NOLINTEND(cppcoreguidelines-*,readability-*)