Revert "Address the poor performance of the existing unique-name generation (#17944)"

This reverts commit 83202d8ad6.

# Conflicts:
#	src/Base/Tools.cpp
#	src/Base/Tools.h
This commit is contained in:
Benjamin Nauck
2024-12-16 14:30:49 +01:00
committed by Yorik van Havre
parent 844d88fb7a
commit a2c980f7d6
28 changed files with 430 additions and 716 deletions

View File

@@ -463,13 +463,14 @@ const char* Base::XMLReader::addFile(const char* Name, Base::Persistence* Object
temp.Object = Object;
FileList.push_back(temp);
FileNames.push_back(temp.FileName);
return Name;
}
bool Base::XMLReader::hasFilenames() const
const std::vector<std::string>& Base::XMLReader::getFilenames() const
{
return FileList.size() > 0;
return FileNames;
}
bool Base::XMLReader::hasReadFailed(const std::string& filename) const

View File

@@ -251,8 +251,8 @@ public:
const char* addFile(const char* Name, Base::Persistence* Object);
/// process the requested file writes
void readFiles(zipios::ZipInputStream& zipstream) const;
/// Returns whether reader has any registered filenames
bool hasFilenames() const;
/// get all registered file names
const std::vector<std::string>& getFilenames() const;
/// returns true if reading the file \a filename has failed
bool hasReadFailed(const std::string& filename) const;
bool isRegistered(Base::Persistence* Object) const;
@@ -364,6 +364,7 @@ public:
std::vector<FileEntry> FileList;
private:
std::vector<std::string> FileNames;
mutable std::vector<std::string> FailedFiles;
std::bitset<32> StatusBits;

View File

@@ -33,214 +33,130 @@
#include "Interpreter.h"
#include "Tools.h"
void Base::UniqueNameManager::PiecewiseSparseIntegerSet::Add(uint value)
namespace Base
{
etype newSpan(value, 1);
iterator above = Spans.lower_bound(newSpan);
if (above != Spans.end() && above->first <= value) {
// The found span includes value so there is nothing to do as it is already in the set.
return;
struct string_comp
{
// s1 and s2 must be numbers represented as string
bool operator()(const std::string& s1, const std::string& s2)
{
if (s1.size() < s2.size()) {
return true;
}
if (s1.size() > s2.size()) {
return false;
}
return s1 < s2;
}
static std::string increment(const std::string& s)
{
std::string n = s;
int addcarry = 1;
for (std::string::reverse_iterator it = n.rbegin(); it != n.rend(); ++it) {
if (addcarry == 0) {
break;
}
int d = *it - 48;
d = d + addcarry;
*it = ((d % 10) + 48);
addcarry = d / 10;
}
if (addcarry > 0) {
std::string b;
b.resize(1);
b[0] = addcarry + 48;
n = b + n;
}
return n;
}
};
class unique_name
{
public:
unique_name(std::string name, const std::vector<std::string>& names, int padding)
: base_name {std::move(name)}
, padding {padding}
{
removeDigitsFromEnd();
findHighestSuffix(names);
}
// Set below to the next span down, if any
iterator below;
if (above == Spans.begin()) {
below = Spans.end();
}
else {
below = above;
--below;
std::string get() const
{
return appendSuffix();
}
if (above != Spans.end() && below != Spans.end()
&& above->first - below->first + 1 == below->second) {
// below and above have a gap of exactly one between them, and this must be value
// so we coalesce the two spans (and the gap) into one.
newSpan = etype(below->first, below->second + above->second + 1);
Spans.erase(above);
above = Spans.erase(below);
private:
void removeDigitsFromEnd()
{
std::string::size_type pos = base_name.find_last_not_of("0123456789");
if (pos != std::string::npos && (pos + 1) < base_name.size()) {
num_suffix = base_name.substr(pos + 1);
base_name.erase(pos + 1);
}
}
if (below != Spans.end() && value - below->first == below->second) {
// value is adjacent to the end of below, so just expand below by one
newSpan = etype(below->first, below->second + 1);
above = Spans.erase(below);
void findHighestSuffix(const std::vector<std::string>& names)
{
for (const auto& name : names) {
if (name.substr(0, base_name.length()) == base_name) { // same prefix
std::string suffix(name.substr(base_name.length()));
if (!suffix.empty()) {
std::string::size_type pos = suffix.find_first_not_of("0123456789");
if (pos == std::string::npos) {
num_suffix = std::max<std::string>(num_suffix, suffix, Base::string_comp());
}
}
}
}
}
else if (above != Spans.end() && above->first - value == 1) {
// value is adjacent to the start of above, so juse expand above down by one
newSpan = etype(above->first - 1, above->second + 1);
above = Spans.erase(above);
std::string appendSuffix() const
{
std::stringstream str;
str << base_name;
if (padding > 0) {
str.fill('0');
str.width(padding);
}
str << Base::string_comp::increment(num_suffix);
return str.str();
}
// else value is not adjacent to any existing span, so just make anew span for it
Spans.insert(above, newSpan);
}
void Base::UniqueNameManager::PiecewiseSparseIntegerSet::Remove(uint value)
private:
std::string num_suffix;
std::string base_name;
int padding;
};
} // namespace Base
std::string
Base::Tools::getUniqueName(const std::string& name, const std::vector<std::string>& names, int pad)
{
etype newSpan(value, 1);
iterator at = Spans.lower_bound(newSpan);
if (at == Spans.end() || at->first > value) {
// The found span does not include value so there is nothing to do, as it is already not in
// the set.
return;
if (names.empty()) {
return name;
}
if (at->second == 1) {
// value is the only in this span, just remove the span
Spans.erase(at);
}
else if (at->first == value) {
// value is the first in this span, trim the lower end
etype replacement(at->first + 1, at->second - 1);
Spans.insert(Spans.erase(at), replacement);
}
else if (value - at->first == at->second - 1) {
// value is the last in this span, trim the upper end
etype replacement(at->first, at->second - 1);
Spans.insert(Spans.erase(at), replacement);
}
else {
// value is in the moddle of the span, so we must split it.
etype firstReplacement(at->first, value - at->first);
etype secondReplacement(value + 1, at->second - ((value + 1) - at->first));
// Because erase returns the iterator after the erased element, and insert returns the
// iterator for the inserted item, we want to insert secondReplacement first.
Spans.insert(Spans.insert(Spans.erase(at), secondReplacement), firstReplacement);
}
}
bool Base::UniqueNameManager::PiecewiseSparseIntegerSet::Contains(uint value) const
{
iterator at = Spans.lower_bound(etype(value, 1));
return at != Spans.end() && at->first <= value;
Base::unique_name unique(name, names, pad);
return unique.get();
}
std::tuple<uint, uint> Base::UniqueNameManager::decomposeName(const std::string& name,
std::string& baseNameOut,
std::string& nameSuffixOut) const
std::string Base::Tools::addNumber(const std::string& name, unsigned int num, int d)
{
auto suffixStart = std::make_reverse_iterator(GetNameSuffixStartPosition(name));
nameSuffixOut = name.substr(name.crend() - suffixStart);
auto digitsStart = std::find_if_not(suffixStart, name.crend(), [](char c) {
return std::isdigit(c);
});
baseNameOut = name.substr(0, name.crend() - digitsStart);
uint digitCount = digitsStart - suffixStart;
if (digitCount == 0) {
// No digits in name
return std::tuple<uint, uint> {0, 0};
std::stringstream str;
str << name;
if (d > 0) {
str.fill('0');
str.width(d);
}
else {
return std::tuple<uint, uint> {
digitCount,
std::stoul(name.substr(name.crend() - digitsStart, digitCount))};
}
}
void Base::UniqueNameManager::addExactName(const std::string& name)
{
std::string baseName;
std::string nameSuffix;
uint digitCount;
uint digitsValue;
std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
baseName += nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// First use of baseName
baseNameEntry =
UniqueSeeds.emplace(baseName, std::vector<PiecewiseSparseIntegerSet>()).first;
}
if (digitCount >= baseNameEntry->second.size()) {
// First use of this digitCount
baseNameEntry->second.resize(digitCount + 1);
}
PiecewiseSparseIntegerSet& baseNameAndDigitCountEntry = baseNameEntry->second[digitCount];
// Name should not already be there
assert(!baseNameAndDigitCountEntry.Contains(digitsValue));
baseNameAndDigitCountEntry.Add(digitsValue);
}
std::string Base::UniqueNameManager::makeUniqueName(const std::string& modelName,
std::size_t minDigits) const
{
std::string namePrefix;
std::string nameSuffix;
decomposeName(modelName, namePrefix, nameSuffix);
std::string baseName = namePrefix + nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// First use of baseName, just return it with no unique digits
return baseName;
}
// We don't care about the digit count of the suggested name, we always use at least the most
// digits ever used before.
std::size_t digitCount = baseNameEntry->second.size() - 1;
uint digitsValue;
if (digitCount < minDigits) {
// Caller is asking for more digits than we have in any registered name.
// We start the longer digit string at 000...0001 even though we might have shorter strings
// with larger numeric values.
digitCount = minDigits;
digitsValue = 1;
}
else {
digitsValue = baseNameEntry->second[digitCount].Next();
}
std::string digits = std::to_string(digitsValue);
if (digitCount > digits.size()) {
namePrefix += std::string(digitCount - digits.size(), '0');
}
return namePrefix + digits + nameSuffix;
str << num;
return str.str();
}
void Base::UniqueNameManager::removeExactName(const std::string& name)
{
std::string baseName;
std::string nameSuffix;
uint digitCount;
uint digitsValue;
std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
baseName += nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// name must not be registered, so nothing to do.
return;
}
auto& digitValueSets = baseNameEntry->second;
if (digitCount >= digitValueSets.size()) {
// First use of this digitCount, name must not be registered, so nothing to do.
return;
}
digitValueSets[digitCount].Remove(digitsValue);
// an element of digitValueSets may now be newly empty and so may other elements below it
// Prune off all such trailing empty entries.
auto lastNonemptyEntry =
std::find_if(digitValueSets.crbegin(), digitValueSets.crend(), [](auto& it) {
return it.Any();
});
if (lastNonemptyEntry == digitValueSets.crend()) {
// All entries are empty, so the entire baseName can be forgotten.
UniqueSeeds.erase(baseName);
}
else {
digitValueSets.resize(digitValueSets.crend() - lastNonemptyEntry);
}
}
bool Base::UniqueNameManager::containsName(const std::string& name) const
{
std::string baseName;
std::string nameSuffix;
uint digitCount;
uint digitsValue;
std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
baseName += nameSuffix;
auto baseNameEntry = UniqueSeeds.find(baseName);
if (baseNameEntry == UniqueSeeds.end()) {
// base name is not registered
return false;
}
if (digitCount >= baseNameEntry->second.size()) {
// First use of this digitCount, name must not be registered, so not in collection
return false;
}
return baseNameEntry->second[digitCount].Contains(digitsValue);
}
std::string Base::Tools::getIdentifier(const std::string& name)
{
if (name.empty()) {

View File

@@ -33,7 +33,6 @@
#include <iostream>
#include <vector>
#include <string>
#include <set>
#include <boost_signals2.hpp>
#include <QString>
@@ -265,100 +264,11 @@ public:
// ----------------------------------------------------------------------------
class BaseExport UniqueNameManager
{
protected:
// This method returns the position of the start of the suffix (or name.cend() if no
// suffix). It must return the same suffix lentgh (name.size() - returnValue) for both
// unique names (one containing digits) and the corresponding base name (with no digits).
virtual std::string::const_iterator GetNameSuffixStartPosition(const std::string& name) const
{
return name.cend();
}
private:
class PiecewiseSparseIntegerSet
{
public:
PiecewiseSparseIntegerSet()
{}
private:
// Each pair being <lowest, count> represents the span of integers from lowest to
// (lowest+count-1) inclusive
using etype = std::pair<uint, uint>;
// This span comparer class is analogous to std::less and treats overlapping spans as being
// neither greater nor less than each other
class comparer
{
public:
bool operator()(const etype& lhs, const etype& rhs) const
{
// The equality case here is when lhs is below and directly adjacent to rhs.
return rhs.first - lhs.first >= lhs.second;
}
};
// Spans is the set of spans. Adjacent spans are coalesced so there are always gaps between
// the entries.
std::set<etype, comparer> Spans;
using iterator = typename std::set<etype, comparer>::iterator;
using const_iterator = typename std::set<etype, comparer>::const_iterator;
public:
void Add(uint value);
void Remove(uint value);
bool Contains(uint value) const;
bool Any() const
{
return Spans.size() != 0;
}
void Clear()
{
Spans.clear();
}
uint Next() const
{
if (Spans.size() == 0) {
return 0;
}
iterator last = Spans.end();
--last;
return last->first + last->second;
}
};
// Keyed as UniqueSeeds[baseName][digitCount][digitValue] iff that seed is taken.
// We need the double-indexing so that Name01 and Name001 can both be indexed, although we only
// ever allocate off the longest for each name i.e. UniqueSeeds[baseName].size()-1 digits.
std::map<std::string, std::vector<PiecewiseSparseIntegerSet>> UniqueSeeds;
public:
std::tuple<uint, uint> decomposeName(const std::string& name,
std::string& baseNameOut,
std::string& nameSuffixOut) const;
UniqueNameManager()
{}
// Register a name in the collection. It is an error (detected only by assertions) to register a
// name more than once. The effect if undetected is that the second registration will have no
// effect
void addExactName(const std::string& name);
std::string makeUniqueName(const std::string& modelName, std::size_t minDigits = 0) const;
// Remove a registered name so it can be generated again.
// Nothing happens if you try to remove a non-registered name.
void removeExactName(const std::string& name);
bool containsName(const std::string& name) const;
void clear()
{
UniqueSeeds.clear();
}
};
struct BaseExport Tools
{
static std::string
getUniqueName(const std::string&, const std::vector<std::string>&, int d = 0);
static std::string addNumber(const std::string&, unsigned int, int d = 0);
static std::string getIdentifier(const std::string&);
static std::wstring widen(const std::string& str);
static std::string narrow(const std::wstring& str);

View File

@@ -247,19 +247,56 @@ std::string Writer::addFile(const char* Name, const Base::Persistence* Object)
assert(!isForceXML());
FileEntry temp;
temp.FileName = Name ? Name : "";
if (FileNameManager.containsName(temp.FileName)) {
temp.FileName = FileNameManager.makeUniqueName(temp.FileName);
}
temp.FileName = getUniqueFileName(Name);
temp.Object = Object;
FileList.push_back(temp);
FileNameManager.addExactName(temp.FileName);
FileNames.push_back(temp.FileName);
// return the unique file name
return temp.FileName;
}
std::string Writer::getUniqueFileName(const char* Name)
{
// name in use?
std::string CleanName = (Name ? Name : "");
std::vector<std::string>::const_iterator pos;
pos = find(FileNames.begin(), FileNames.end(), CleanName);
if (pos == FileNames.end()) {
// if not, name is OK
return CleanName;
}
std::vector<std::string> names;
names.reserve(FileNames.size());
FileInfo fi(CleanName);
CleanName = fi.fileNamePure();
std::string ext = fi.extension();
for (pos = FileNames.begin(); pos != FileNames.end(); ++pos) {
fi.setFile(*pos);
std::string FileName = fi.fileNamePure();
if (fi.extension() == ext) {
names.push_back(FileName);
}
}
std::stringstream str;
str << Base::Tools::getUniqueName(CleanName, names);
if (!ext.empty()) {
str << "." << ext;
}
return str.str();
}
const std::vector<std::string>& Writer::getFilenames() const
{
return FileNames;
}
void Writer::incInd()
{
if (indent < 1020) {

View File

@@ -39,8 +39,6 @@
#include <zipios++/zipoutputstream.h>
#include <zipios++/meta-iostreams.h>
#include <Base/Tools.h>
#include "FileInfo.h"
@@ -58,24 +56,6 @@ class Persistence;
*/
class BaseExport Writer
{
private:
// This overrides UniqueNameManager's suffix-locating function so thet the last '.' and
// everything after it is considered suffix.
class UniqueFileNameManager: public UniqueNameManager
{
protected:
virtual std::string::const_iterator
GetNameSuffixStartPosition(const std::string& name) const override
{
// This is an awkward way to do this, because the FileInfo class only yields pieces of
// the path, not delimiter positions. We can't just use fi.extension().size() because
// both "xyz" and "xyz." would yield three; we need the length of the extension
// *including its delimiter* so we use the length difference between the fileName and
// fileNamePure.
FileInfo fi(name);
return name.end() - (fi.fileName().size() - fi.fileNamePure().size());
}
};
public:
Writer();
@@ -104,6 +84,8 @@ public:
std::string addFile(const char* Name, const Base::Persistence* Object);
/// process the requested file storing
virtual void writeFiles() = 0;
/// get all registered file names
const std::vector<std::string>& getFilenames() const;
/// Set mode
void setMode(const std::string& mode);
/// Set modes
@@ -169,13 +151,14 @@ public:
std::string ObjectName;
protected:
std::string getUniqueFileName(const char* Name);
struct FileEntry
{
std::string FileName;
const Base::Persistence* Object;
};
std::vector<FileEntry> FileList;
UniqueFileNameManager FileNameManager;
std::vector<std::string> FileNames;
std::vector<std::string> Errors;
std::set<std::string> Modes;