Revert "Address the poor performance of the existing unique-name generation (#17944)"

This reverts commit 83202d8ad6. # Conflicts: # src/Base/Tools.cpp # src/Base/Tools.h
2024-12-16 14:30:49 +01:00
parent 844d88fb7a
commit a2c980f7d6
28 changed files with 430 additions and 716 deletions
--- a/src/Base/Reader.cpp
+++ b/src/Base/Reader.cpp
@@ -463,13 +463,14 @@ const char* Base::XMLReader::addFile(const char* Name, Base::Persistence* Object
    temp.Object = Object;

    FileList.push_back(temp);
+    FileNames.push_back(temp.FileName);

    return Name;
 }

-bool Base::XMLReader::hasFilenames() const
+const std::vector<std::string>& Base::XMLReader::getFilenames() const
 {
-    return FileList.size() > 0;
+    return FileNames;
 }

 bool Base::XMLReader::hasReadFailed(const std::string& filename) const
--- a/src/Base/Reader.h
+++ b/src/Base/Reader.h
@@ -251,8 +251,8 @@ public:
    const char* addFile(const char* Name, Base::Persistence* Object);
    /// process the requested file writes
    void readFiles(zipios::ZipInputStream& zipstream) const;
-    /// Returns whether reader has any registered filenames
-    bool hasFilenames() const;
+    /// get all registered file names
+    const std::vector<std::string>& getFilenames() const;
    /// returns true if reading the file \a filename has failed
    bool hasReadFailed(const std::string& filename) const;
    bool isRegistered(Base::Persistence* Object) const;
@@ -364,6 +364,7 @@ public:
    std::vector<FileEntry> FileList;

 private:
+    std::vector<std::string> FileNames;
    mutable std::vector<std::string> FailedFiles;

    std::bitset<32> StatusBits;
--- a/src/Base/Tools.cpp
+++ b/src/Base/Tools.cpp
@@ -33,214 +33,130 @@
 #include "Interpreter.h"
 #include "Tools.h"

-void Base::UniqueNameManager::PiecewiseSparseIntegerSet::Add(uint value)
+namespace Base
 {
-    etype newSpan(value, 1);
-    iterator above = Spans.lower_bound(newSpan);
-    if (above != Spans.end() && above->first <= value) {
-        // The found span includes value so there is nothing to do as it is already in the set.
-        return;
+struct string_comp
+{
+    // s1 and s2 must be numbers represented as string
+    bool operator()(const std::string& s1, const std::string& s2)
+    {
+        if (s1.size() < s2.size()) {
+            return true;
+        }
+        if (s1.size() > s2.size()) {
+            return false;
+        }
+
+        return s1 < s2;
+    }
+    static std::string increment(const std::string& s)
+    {
+        std::string n = s;
+        int addcarry = 1;
+        for (std::string::reverse_iterator it = n.rbegin(); it != n.rend(); ++it) {
+            if (addcarry == 0) {
+                break;
+            }
+            int d = *it - 48;
+            d = d + addcarry;
+            *it = ((d % 10) + 48);
+            addcarry = d / 10;
+        }
+        if (addcarry > 0) {
+            std::string b;
+            b.resize(1);
+            b[0] = addcarry + 48;
+            n = b + n;
+        }
+
+        return n;
+    }
+};
+
+class unique_name
+{
+public:
+    unique_name(std::string name, const std::vector<std::string>& names, int padding)
+        : base_name {std::move(name)}
+        , padding {padding}
+    {
+        removeDigitsFromEnd();
+        findHighestSuffix(names);
    }

-    // Set below to the next span down, if any
-    iterator below;
-    if (above == Spans.begin()) {
-        below = Spans.end();
-    }
-    else {
-        below = above;
-        --below;
+    std::string get() const
+    {
+        return appendSuffix();
    }

-    if (above != Spans.end() && below != Spans.end()
-        && above->first - below->first + 1 == below->second) {
-        // below and above have a gap of exactly one between them, and this must be value
-        // so we coalesce the two spans (and the gap) into one.
-        newSpan = etype(below->first, below->second + above->second + 1);
-        Spans.erase(above);
-        above = Spans.erase(below);
+private:
+    void removeDigitsFromEnd()
+    {
+        std::string::size_type pos = base_name.find_last_not_of("0123456789");
+        if (pos != std::string::npos && (pos + 1) < base_name.size()) {
+            num_suffix = base_name.substr(pos + 1);
+            base_name.erase(pos + 1);
+        }
    }
-    if (below != Spans.end() && value - below->first == below->second) {
-        // value is adjacent to the end of below, so just expand below by one
-        newSpan = etype(below->first, below->second + 1);
-        above = Spans.erase(below);
+
+    void findHighestSuffix(const std::vector<std::string>& names)
+    {
+        for (const auto& name : names) {
+            if (name.substr(0, base_name.length()) == base_name) {  // same prefix
+                std::string suffix(name.substr(base_name.length()));
+                if (!suffix.empty()) {
+                    std::string::size_type pos = suffix.find_first_not_of("0123456789");
+                    if (pos == std::string::npos) {
+                        num_suffix = std::max<std::string>(num_suffix, suffix, Base::string_comp());
+                    }
+                }
+            }
+        }
    }
-    else if (above != Spans.end() && above->first - value == 1) {
-        // value is adjacent to the start of above, so juse expand above down by one
-        newSpan = etype(above->first - 1, above->second + 1);
-        above = Spans.erase(above);
+
+    std::string appendSuffix() const
+    {
+        std::stringstream str;
+        str << base_name;
+        if (padding > 0) {
+            str.fill('0');
+            str.width(padding);
+        }
+        str << Base::string_comp::increment(num_suffix);
+        return str.str();
    }
-    // else  value is not adjacent to any existing span, so just make anew span for it
-    Spans.insert(above, newSpan);
-}
-void Base::UniqueNameManager::PiecewiseSparseIntegerSet::Remove(uint value)
+
+private:
+    std::string num_suffix;
+    std::string base_name;
+    int padding;
+};
+
+}  // namespace Base
+
+std::string
+Base::Tools::getUniqueName(const std::string& name, const std::vector<std::string>& names, int pad)
 {
-    etype newSpan(value, 1);
-    iterator at = Spans.lower_bound(newSpan);
-    if (at == Spans.end() || at->first > value) {
-        // The found span does not include value so there is nothing to do, as it is already not in
-        // the set.
-        return;
+    if (names.empty()) {
+        return name;
    }
-    if (at->second == 1) {
-        // value is the only in this span, just remove the span
-        Spans.erase(at);
-    }
-    else if (at->first == value) {
-        // value is the first in this span, trim the lower end
-        etype replacement(at->first + 1, at->second - 1);
-        Spans.insert(Spans.erase(at), replacement);
-    }
-    else if (value - at->first == at->second - 1) {
-        // value is the last in this span, trim the upper end
-        etype replacement(at->first, at->second - 1);
-        Spans.insert(Spans.erase(at), replacement);
-    }
-    else {
-        // value is in the moddle of the span, so we must split it.
-        etype firstReplacement(at->first, value - at->first);
-        etype secondReplacement(value + 1, at->second - ((value + 1) - at->first));
-        // Because erase returns the iterator after the erased element, and insert returns the
-        // iterator for the inserted item, we want to insert secondReplacement first.
-        Spans.insert(Spans.insert(Spans.erase(at), secondReplacement), firstReplacement);
-    }
-}
-bool Base::UniqueNameManager::PiecewiseSparseIntegerSet::Contains(uint value) const
-{
-    iterator at = Spans.lower_bound(etype(value, 1));
-    return at != Spans.end() && at->first <= value;
+
+    Base::unique_name unique(name, names, pad);
+    return unique.get();
 }

-std::tuple<uint, uint> Base::UniqueNameManager::decomposeName(const std::string& name,
-                                                              std::string& baseNameOut,
-                                                              std::string& nameSuffixOut) const
+std::string Base::Tools::addNumber(const std::string& name, unsigned int num, int d)
 {
-    auto suffixStart = std::make_reverse_iterator(GetNameSuffixStartPosition(name));
-    nameSuffixOut = name.substr(name.crend() - suffixStart);
-    auto digitsStart = std::find_if_not(suffixStart, name.crend(), [](char c) {
-        return std::isdigit(c);
-    });
-    baseNameOut = name.substr(0, name.crend() - digitsStart);
-    uint digitCount = digitsStart - suffixStart;
-    if (digitCount == 0) {
-        // No digits in name
-        return std::tuple<uint, uint> {0, 0};
+    std::stringstream str;
+    str << name;
+    if (d > 0) {
+        str.fill('0');
+        str.width(d);
    }
-    else {
-        return std::tuple<uint, uint> {
-            digitCount,
-            std::stoul(name.substr(name.crend() - digitsStart, digitCount))};
-    }
-}
-void Base::UniqueNameManager::addExactName(const std::string& name)
-{
-    std::string baseName;
-    std::string nameSuffix;
-    uint digitCount;
-    uint digitsValue;
-    std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
-    baseName += nameSuffix;
-    auto baseNameEntry = UniqueSeeds.find(baseName);
-    if (baseNameEntry == UniqueSeeds.end()) {
-        // First use of baseName
-        baseNameEntry =
-            UniqueSeeds.emplace(baseName, std::vector<PiecewiseSparseIntegerSet>()).first;
-    }
-    if (digitCount >= baseNameEntry->second.size()) {
-        // First use of this digitCount
-        baseNameEntry->second.resize(digitCount + 1);
-    }
-    PiecewiseSparseIntegerSet& baseNameAndDigitCountEntry = baseNameEntry->second[digitCount];
-    // Name should not already be there
-    assert(!baseNameAndDigitCountEntry.Contains(digitsValue));
-    baseNameAndDigitCountEntry.Add(digitsValue);
-}
-std::string Base::UniqueNameManager::makeUniqueName(const std::string& modelName,
-                                                    std::size_t minDigits) const
-{
-    std::string namePrefix;
-    std::string nameSuffix;
-    decomposeName(modelName, namePrefix, nameSuffix);
-    std::string baseName = namePrefix + nameSuffix;
-    auto baseNameEntry = UniqueSeeds.find(baseName);
-    if (baseNameEntry == UniqueSeeds.end()) {
-        // First use of baseName, just return it with no unique digits
-        return baseName;
-    }
-    // We don't care about the digit count of the suggested name, we always use at least the most
-    // digits ever used before.
-    std::size_t digitCount = baseNameEntry->second.size() - 1;
-    uint digitsValue;
-    if (digitCount < minDigits) {
-        // Caller is asking for more digits than we have in any registered name.
-        // We start the longer digit string at 000...0001 even though we might have shorter strings
-        // with larger numeric values.
-        digitCount = minDigits;
-        digitsValue = 1;
-    }
-    else {
-        digitsValue = baseNameEntry->second[digitCount].Next();
-    }
-    std::string digits = std::to_string(digitsValue);
-    if (digitCount > digits.size()) {
-        namePrefix += std::string(digitCount - digits.size(), '0');
-    }
-    return namePrefix + digits + nameSuffix;
+    str << num;
+    return str.str();
 }

-void Base::UniqueNameManager::removeExactName(const std::string& name)
-{
-    std::string baseName;
-    std::string nameSuffix;
-    uint digitCount;
-    uint digitsValue;
-    std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
-    baseName += nameSuffix;
-    auto baseNameEntry = UniqueSeeds.find(baseName);
-    if (baseNameEntry == UniqueSeeds.end()) {
-        // name must not be registered, so nothing to do.
-        return;
-    }
-    auto& digitValueSets = baseNameEntry->second;
-    if (digitCount >= digitValueSets.size()) {
-        // First use of this digitCount, name must not be registered, so nothing to do.
-        return;
-    }
-    digitValueSets[digitCount].Remove(digitsValue);
-    // an element of digitValueSets may now be newly empty and so may other elements below it
-    // Prune off all such trailing empty entries.
-    auto lastNonemptyEntry =
-        std::find_if(digitValueSets.crbegin(), digitValueSets.crend(), [](auto& it) {
-            return it.Any();
-        });
-    if (lastNonemptyEntry == digitValueSets.crend()) {
-        // All entries are empty, so the entire baseName can be forgotten.
-        UniqueSeeds.erase(baseName);
-    }
-    else {
-        digitValueSets.resize(digitValueSets.crend() - lastNonemptyEntry);
-    }
-}
-
-bool Base::UniqueNameManager::containsName(const std::string& name) const
-{
-    std::string baseName;
-    std::string nameSuffix;
-    uint digitCount;
-    uint digitsValue;
-    std::tie(digitCount, digitsValue) = decomposeName(name, baseName, nameSuffix);
-    baseName += nameSuffix;
-    auto baseNameEntry = UniqueSeeds.find(baseName);
-    if (baseNameEntry == UniqueSeeds.end()) {
-        // base name is not registered
-        return false;
-    }
-    if (digitCount >= baseNameEntry->second.size()) {
-        // First use of this digitCount, name must not be registered, so not in collection
-        return false;
-    }
-    return baseNameEntry->second[digitCount].Contains(digitsValue);
-}
 std::string Base::Tools::getIdentifier(const std::string& name)
 {
    if (name.empty()) {
--- a/src/Base/Tools.h
+++ b/src/Base/Tools.h
@@ -33,7 +33,6 @@
 #include <iostream>
 #include <vector>
 #include <string>
-#include <set>
 #include <boost_signals2.hpp>
 #include <QString>

@@ -265,100 +264,11 @@ public:

 // ----------------------------------------------------------------------------

-
-class BaseExport UniqueNameManager
-{
-protected:
-    // This method returns the position of the start of the suffix (or name.cend() if no
-    // suffix). It must return the same suffix lentgh (name.size() - returnValue) for both
-    // unique names (one containing digits) and the corresponding base name (with no digits).
-    virtual std::string::const_iterator GetNameSuffixStartPosition(const std::string& name) const
-    {
-        return name.cend();
-    }
-
-private:
-    class PiecewiseSparseIntegerSet
-    {
-    public:
-        PiecewiseSparseIntegerSet()
-        {}
-
-    private:
-        // Each pair being <lowest, count> represents the span of integers from lowest to
-        // (lowest+count-1) inclusive
-        using etype = std::pair<uint, uint>;
-        // This span comparer class is analogous to std::less and treats overlapping spans as being
-        // neither greater nor less than each other
-        class comparer
-        {
-        public:
-            bool operator()(const etype& lhs, const etype& rhs) const
-            {
-                // The equality case here is when lhs is below and directly adjacent to rhs.
-                return rhs.first - lhs.first >= lhs.second;
-            }
-        };
-        // Spans is the set of spans. Adjacent spans are coalesced so there are always gaps between
-        // the entries.
-        std::set<etype, comparer> Spans;
-        using iterator = typename std::set<etype, comparer>::iterator;
-        using const_iterator = typename std::set<etype, comparer>::const_iterator;
-
-    public:
-        void Add(uint value);
-        void Remove(uint value);
-        bool Contains(uint value) const;
-        bool Any() const
-        {
-            return Spans.size() != 0;
-        }
-        void Clear()
-        {
-            Spans.clear();
-        }
-        uint Next() const
-        {
-            if (Spans.size() == 0) {
-                return 0;
-            }
-            iterator last = Spans.end();
-            --last;
-            return last->first + last->second;
-        }
-    };
-    // Keyed as UniqueSeeds[baseName][digitCount][digitValue] iff that seed is taken.
-    // We need the double-indexing so that Name01 and Name001 can both be indexed, although we only
-    // ever allocate off the longest for each name i.e. UniqueSeeds[baseName].size()-1 digits.
-    std::map<std::string, std::vector<PiecewiseSparseIntegerSet>> UniqueSeeds;
-
-public:
-    std::tuple<uint, uint> decomposeName(const std::string& name,
-                                         std::string& baseNameOut,
-                                         std::string& nameSuffixOut) const;
-
-    UniqueNameManager()
-    {}
-
-    // Register a name in the collection. It is an error (detected only by assertions) to register a
-    // name more than once. The effect if undetected is that the second registration will have no
-    // effect
-    void addExactName(const std::string& name);
-    std::string makeUniqueName(const std::string& modelName, std::size_t minDigits = 0) const;
-
-    // Remove a registered name so it can be generated again.
-    // Nothing happens if you try to remove a non-registered name.
-    void removeExactName(const std::string& name);
-
-    bool containsName(const std::string& name) const;
-
-    void clear()
-    {
-        UniqueSeeds.clear();
-    }
-};
 struct BaseExport Tools
 {
+    static std::string
+    getUniqueName(const std::string&, const std::vector<std::string>&, int d = 0);
+    static std::string addNumber(const std::string&, unsigned int, int d = 0);
    static std::string getIdentifier(const std::string&);
    static std::wstring widen(const std::string& str);
    static std::string narrow(const std::wstring& str);
--- a/src/Base/Writer.cpp
+++ b/src/Base/Writer.cpp
@@ -247,19 +247,56 @@ std::string Writer::addFile(const char* Name, const Base::Persistence* Object)
    assert(!isForceXML());

    FileEntry temp;
-    temp.FileName = Name ? Name : "";
-    if (FileNameManager.containsName(temp.FileName)) {
-        temp.FileName = FileNameManager.makeUniqueName(temp.FileName);
-    }
+    temp.FileName = getUniqueFileName(Name);
    temp.Object = Object;

    FileList.push_back(temp);
-    FileNameManager.addExactName(temp.FileName);
+
+    FileNames.push_back(temp.FileName);

    // return the unique file name
    return temp.FileName;
 }

+std::string Writer::getUniqueFileName(const char* Name)
+{
+    // name in use?
+    std::string CleanName = (Name ? Name : "");
+    std::vector<std::string>::const_iterator pos;
+    pos = find(FileNames.begin(), FileNames.end(), CleanName);
+
+    if (pos == FileNames.end()) {
+        // if not, name is OK
+        return CleanName;
+    }
+
+    std::vector<std::string> names;
+    names.reserve(FileNames.size());
+    FileInfo fi(CleanName);
+    CleanName = fi.fileNamePure();
+    std::string ext = fi.extension();
+    for (pos = FileNames.begin(); pos != FileNames.end(); ++pos) {
+        fi.setFile(*pos);
+        std::string FileName = fi.fileNamePure();
+        if (fi.extension() == ext) {
+            names.push_back(FileName);
+        }
+    }
+
+    std::stringstream str;
+    str << Base::Tools::getUniqueName(CleanName, names);
+    if (!ext.empty()) {
+        str << "." << ext;
+    }
+
+    return str.str();
+}
+
+const std::vector<std::string>& Writer::getFilenames() const
+{
+    return FileNames;
+}
+
 void Writer::incInd()
 {
    if (indent < 1020) {
--- a/src/Base/Writer.h
+++ b/src/Base/Writer.h
@@ -39,8 +39,6 @@
 #include <zipios++/zipoutputstream.h>
 #include <zipios++/meta-iostreams.h>

-#include <Base/Tools.h>
-
 #include "FileInfo.h"


@@ -58,24 +56,6 @@ class Persistence;
 */
 class BaseExport Writer
 {
-private:
-    // This overrides UniqueNameManager's suffix-locating function so thet the last '.' and
-    // everything after it is considered suffix.
-    class UniqueFileNameManager: public UniqueNameManager
-    {
-    protected:
-        virtual std::string::const_iterator
-        GetNameSuffixStartPosition(const std::string& name) const override
-        {
-            // This is an awkward way to do this, because the FileInfo class only yields pieces of
-            // the path, not delimiter positions. We can't just use fi.extension().size() because
-            // both "xyz" and "xyz." would yield three; we need the length of the extension
-            // *including its delimiter* so we use the length difference between the fileName and
-            // fileNamePure.
-            FileInfo fi(name);
-            return name.end() - (fi.fileName().size() - fi.fileNamePure().size());
-        }
-    };

 public:
    Writer();
@@ -104,6 +84,8 @@ public:
    std::string addFile(const char* Name, const Base::Persistence* Object);
    /// process the requested file storing
    virtual void writeFiles() = 0;
+    /// get all registered file names
+    const std::vector<std::string>& getFilenames() const;
    /// Set mode
    void setMode(const std::string& mode);
    /// Set modes
@@ -169,13 +151,14 @@ public:
    std::string ObjectName;

 protected:
+    std::string getUniqueFileName(const char* Name);
    struct FileEntry
    {
        std::string FileName;
        const Base::Persistence* Object;
    };
    std::vector<FileEntry> FileList;
-    UniqueFileNameManager FileNameManager;
+    std::vector<std::string> FileNames;
    std::vector<std::string> Errors;
    std::set<std::string> Modes;