Revert "Address the poor performance of the existing unique-name generation (#17944)"

This reverts commit 83202d8ad6.

# Conflicts:
#	src/Base/Tools.cpp
#	src/Base/Tools.h
This commit is contained in:
Benjamin Nauck
2024-12-16 14:30:49 +01:00
committed by Yorik van Havre
parent 844d88fb7a
commit a2c980f7d6
28 changed files with 430 additions and 716 deletions

View File

@@ -33,214 +33,130 @@
#include "Interpreter.h"
#include "Tools.h"
void Base::UniqueNameManager::PiecewiseSparseIntegerSet::Add(uint value)
namespace Base
{
etype newSpan(value, 1);
iterator above = Spans.lower_bound(newSpan);
if (above != Spans.end() && above->first <= value) {
// The found span includes value so there is nothing to do as it is already in the set.
return;
struct string_comp
{
// s1 and s2 must be numbers represented as string
bool operator()(const std::string& s1, const std::string& s2)
{
if (s1.size() < s2.size()) {
return true;
}
if (s1.size() > s2.size()) {
return false;
}
return s1 < s2;
}
static std::string increment(const std::string& s)
{
std::string n = s;
int addcarry = 1;
for (std::string::reverse_iterator it = n.rbegin(); it != n.rend(); ++it) {
if (addcarry == 0) {
break;
}
int d = *it - 48;
d = d + addcarry;
*it = ((d % 10) + 48);
addcarry = d / 10;
}
if (addcarry > 0) {
std::string b;
b.resize(1);
b[0] = addcarry + 48;
n = b + n;
}
return n;
}
};
class unique_name
{
public:
unique_name(std::string name, const std::vector<std::string>& names, int padding)
: base_name {std::move(name)}
, padding {padding}
{
removeDigitsFromEnd();
findHighestSuffix(names);
}
// Set below to the next span down, if any
iterator below;
if (above == Spans.begin()) {
below = Spans.end();
}
else {
below = above;
--below;
std::string get() const
{
return appendSuffix();
}
if (above != Spans.end() && below != Spans.end()
&& above->first - below->first + 1 == below->second) {
// below and above have a gap of exactly one between them, and this must be value
// so we coalesce the two spans (and the gap) into one.
newSpan = etype(below->first, below->second + above->second + 1);
Spans.erase(above);
above = Spans.erase(below);
private:
void removeDigitsFromEnd()
{
std::string::size_type pos = base_name.find_last_not_of("0123456789");
if (pos != std::string::npos && (pos + 1) < base_name.size()) {
num_suffix = base_name.substr(pos + 1);
base_name.erase(pos + 1);
}
}
if (below != Spans.end() && value - below->first == below->second) {
// value is adjacent to the end of below, so just expand below by one
newSpan = etype(below->first, below->second + 1);
above = Spans.erase(below);
void findHighestSuffix(const std::vector<std::string>& names)
{
for (const auto& name : names) {
if (name.substr(0, base_name.length()) == base_name) { // same prefix
std::string suffix(name.substr(base_name.length()));
if (!suffix.empty()) {
std::string::size_type pos = suffix.find_first_not_of("0123456789");
if (pos == std::string::npos) {
num_suffix = std::max<std::string>(num_suffix, suffix, Base::string_comp());
}
}
}
}
}
else if (above != Spans.end() && above->first - value == 1) {
// value is adjacent to the start of above, so juse expand above down by one
newSpan = etype(above->first - 1, above->second + 1);
above = Spans.erase(above);
std::string appendSuffix() const
{
std::stringstream str;
str << base_name;
if (padding > 0) {
str.fill('0');
str.width(padding);
}
str << Base::string_comp::increment(num_suffix);
return str.str();
}
// else value is not adjacent to any existing span, so just make anew span for it
Spans.insert(above, newSpan);
}
void Base::UniqueNameManager::PiecewiseSparseIntegerSet::Remove(uint value)
private:
std::string num_suffix;
std::string base_name;
int padding;
};
} // namespace Base
std::string
Base::Tools::getUniqueName(const std::string& name, const std::vector<std::string>& names, int pad)
{
etype newSpan(value, 1);
iterator at = Spans.lower_bound(newSpan);
if (at == Spans.end() || at->first > value) {
// The found span does not include value so there is nothing to do, as it is already not in
// the set.
return;
if (names.empty()) {
return name;
}
if (at->second == 1) {
// value is the only in this span, just remove the span
Spans.erase(at);
}
else if (at->first == value) {
// value is the first in this span, trim the lower end
etype replacement(at->first + 1, at->second - 1);
Spans.insert(Spans.erase(at), replacement);
}
else if (value - at->first == at->second - 1) {
// value is the last in this span, trim the upper end
etype replacement(at->first, at->second - 1);
Spans.insert(Spans.erase(at), replacement);
}
else {
// value is in the moddle of the span, so we must split it.
etype firstReplacement(at->first, value - at->first);
etype secondReplacement(value + 1, at->second - ((value + 1) - at->first));
// Because erase returns the iterator after the erased element, and insert returns the
// iterator for the inserted item, we want to insert secondReplacement first.
Spans.insert(Spans.insert(Spans.erase(at), secondReplacement), firstReplacement);
}
}
bool Base::UniqueNameManager::PiecewiseSparseIntegerSet::Contains(uint value) const
{
iterator at = Spans.lower_bound(etype(value, 1));
return at != Spans.end() && at->first <= value;
Base::unique_name unique(name, names, pad);
return unique.get();
}
std::tuple<uint, uint> Base::UniqueNameManager::decomposeName(const std::string& name,
std::string& baseNameOut,
std::string& nameSuffixOut) const
std::string Base::Tools::addNumber(const std::string& name, unsigned int num, int d)
{
auto suffixStart = std::make_reverse_iterator(GetNameSuffixStartPosition(name));
nameSuffixOut = name.substr(name.crend() - suffixStart);
auto digitsStart = std::find_if_not(suffixStart, name.crend(), [](char c) {
return std::isdigit(c);
});
baseNameOut = name.substr(0, name.crend() - digitsStart);
uint digitCount = digitsStart - suffixStart;
if (digitCount == 0) {
// No digits in name
return std::tuple<uint, uint> {0, 0};
std::stringstream str;
str << name;
if (d > 0) {
str.fill('0');
str.width(d);
}
else {
return std::tuple<uint, uint> {
digitCount,
std::stoul(name.substr(name.crend() - digitsStart, digitCount))};
}
}
void Base::UniqueNameManager::addExactName(const std::string& name)
{
std::string baseName;
std::string nameSuffix;
uint digitCount;
uint digitsValue;
std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
baseName += nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// First use of baseName
baseNameEntry =
UniqueSeeds.emplace(baseName, std::vector<PiecewiseSparseIntegerSet>()).first;
}
if (digitCount >= baseNameEntry->second.size()) {
// First use of this digitCount
baseNameEntry->second.resize(digitCount + 1);
}
PiecewiseSparseIntegerSet& baseNameAndDigitCountEntry = baseNameEntry->second[digitCount];
// Name should not already be there
assert(!baseNameAndDigitCountEntry.Contains(digitsValue));
baseNameAndDigitCountEntry.Add(digitsValue);
}
std::string Base::UniqueNameManager::makeUniqueName(const std::string& modelName,
std::size_t minDigits) const
{
std::string namePrefix;
std::string nameSuffix;
decomposeName(modelName, namePrefix, nameSuffix);
std::string baseName = namePrefix + nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// First use of baseName, just return it with no unique digits
return baseName;
}
// We don't care about the digit count of the suggested name, we always use at least the most
// digits ever used before.
std::size_t digitCount = baseNameEntry->second.size() - 1;
uint digitsValue;
if (digitCount < minDigits) {
// Caller is asking for more digits than we have in any registered name.
// We start the longer digit string at 000...0001 even though we might have shorter strings
// with larger numeric values.
digitCount = minDigits;
digitsValue = 1;
}
else {
digitsValue = baseNameEntry->second[digitCount].Next();
}
std::string digits = std::to_string(digitsValue);
if (digitCount > digits.size()) {
namePrefix += std::string(digitCount - digits.size(), '0');
}
return namePrefix + digits + nameSuffix;
str << num;
return str.str();
}
void Base::UniqueNameManager::removeExactName(const std::string& name)
{
std::string baseName;
std::string nameSuffix;
uint digitCount;
uint digitsValue;
std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
baseName += nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// name must not be registered, so nothing to do.
return;
}
auto& digitValueSets = baseNameEntry->second;
if (digitCount >= digitValueSets.size()) {
// First use of this digitCount, name must not be registered, so nothing to do.
return;
}
digitValueSets[digitCount].Remove(digitsValue);
// an element of digitValueSets may now be newly empty and so may other elements below it
// Prune off all such trailing empty entries.
auto lastNonemptyEntry =
std::find_if(digitValueSets.crbegin(), digitValueSets.crend(), [](auto& it) {
return it.Any();
});
if (lastNonemptyEntry == digitValueSets.crend()) {
// All entries are empty, so the entire baseName can be forgotten.
UniqueSeeds.erase(baseName);
}
else {
digitValueSets.resize(digitValueSets.crend() - lastNonemptyEntry);
}
}
bool Base::UniqueNameManager::containsName(const std::string& name) const
{
std::string baseName;
std::string nameSuffix;
uint digitCount;
uint digitsValue;
std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
baseName += nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// base name is not registered
return false;
}
if (digitCount >= baseNameEntry->second.size()) {
// First use of this digitCount, name must not be registered, so not in collection
return false;
}
return baseNameEntry->second[digitCount].Contains(digitsValue);
}
std::string Base::Tools::getIdentifier(const std::string& name)
{
if (name.empty()) {